{"id":"https://openalex.org/W3177150198","doi":"https://doi.org/10.1109/tmm.2021.3091863","title":"Multimodal Learning for Temporally Coherent Talking Face Generation With Articulator Synergy","display_name":"Multimodal Learning for Temporally Coherent Talking Face Generation With Articulator Synergy","publication_year":2021,"publication_date":"2021-06-28","ids":{"openalex":"https://openalex.org/W3177150198","doi":"https://doi.org/10.1109/tmm.2021.3091863","mag":"3177150198"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2021.3091863","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2021.3091863","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089799495","display_name":"Lingyun Yu","orcid":"https://orcid.org/0000-0001-6403-761X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lingyun Yu","raw_affiliation_strings":["Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078162380","display_name":"Hongtao Xie","orcid":"https://orcid.org/0000-0002-6249-5315"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hongtao Xie","raw_affiliation_strings":["Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046305086","display_name":"Yongdong Zhang","orcid":"https://orcid.org/0000-0002-1151-1792"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yongdong Zhang","raw_affiliation_strings":["Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5089799495"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.3254,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.90297225,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"24","issue":null,"first_page":"2950","last_page":"2962"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8538450598716736},{"id":"https://openalex.org/keywords/articulator","display_name":"Articulator","score":0.8253788352012634},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6212155818939209},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5826698541641235},{"id":"https://openalex.org/keywords/landmark","display_name":"Landmark","score":0.5730209946632385},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5677788853645325},{"id":"https://openalex.org/keywords/optical-flow","display_name":"Optical flow","score":0.5420800447463989},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.4855906367301941},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.4231698215007782},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.41741225123405457},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3846599757671356},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.14393240213394165},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07868391275405884}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8538450598716736},{"id":"https://openalex.org/C2778415344","wikidata":"https://www.wikidata.org/wiki/Q443216","display_name":"Articulator","level":2,"score":0.8253788352012634},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6212155818939209},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5826698541641235},{"id":"https://openalex.org/C2780297707","wikidata":"https://www.wikidata.org/wiki/Q4895393","display_name":"Landmark","level":2,"score":0.5730209946632385},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5677788853645325},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.5420800447463989},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.4855906367301941},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.4231698215007782},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.41741225123405457},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3846599757671356},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.14393240213394165},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07868391275405884},{"id":"https://openalex.org/C29694066","wikidata":"https://www.wikidata.org/wiki/Q118301","display_name":"Orthodontics","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2021.3091863","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2021.3091863","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6299999952316284,"display_name":"No poverty","id":"https://metadata.un.org/sdg/1"}],"awards":[{"id":"https://openalex.org/G1484805094","display_name":null,"funder_award_id":"WK3480000011","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G3316061764","display_name":null,"funder_award_id":"62022076","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3722848311","display_name":null,"funder_award_id":"62102127","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G472569701","display_name":null,"funder_award_id":"62032006","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6179918720","display_name":null,"funder_award_id":"U1936210","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7594342687","display_name":null,"funder_award_id":"2020M682035","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1896788142","https://openalex.org/W1916406603","https://openalex.org/W2000540279","https://openalex.org/W2049686551","https://openalex.org/W2064076387","https://openalex.org/W2093450784","https://openalex.org/W2099471712","https://openalex.org/W2132733106","https://openalex.org/W2145023731","https://openalex.org/W2157331557","https://openalex.org/W2296495263","https://openalex.org/W2469524028","https://openalex.org/W2560474170","https://openalex.org/W2593414223","https://openalex.org/W2598638573","https://openalex.org/W2738406145","https://openalex.org/W2794857359","https://openalex.org/W2806833697","https://openalex.org/W2884460600","https://openalex.org/W2904787397","https://openalex.org/W2913664580","https://openalex.org/W2930178207","https://openalex.org/W2944294033","https://openalex.org/W2960274051","https://openalex.org/W2962865004","https://openalex.org/W2962960500","https://openalex.org/W2963073614","https://openalex.org/W2963081548","https://openalex.org/W2963091558","https://openalex.org/W2963290645","https://openalex.org/W2963495263","https://openalex.org/W2963800363","https://openalex.org/W2964559396","https://openalex.org/W2982624843","https://openalex.org/W3006410788","https://openalex.org/W3017343282","https://openalex.org/W3046890131","https://openalex.org/W3093340621","https://openalex.org/W3098341861","https://openalex.org/W3107666850","https://openalex.org/W3108240585","https://openalex.org/W3116298410","https://openalex.org/W3135925326","https://openalex.org/W3180770160","https://openalex.org/W3183999072","https://openalex.org/W4210657261","https://openalex.org/W4289665794","https://openalex.org/W6603616073","https://openalex.org/W6737896281","https://openalex.org/W6748181857","https://openalex.org/W6749825310","https://openalex.org/W6751750676","https://openalex.org/W6753914649","https://openalex.org/W6761716443","https://openalex.org/W6767264202","https://openalex.org/W6774337803"],"related_works":["https://openalex.org/W2034310431","https://openalex.org/W2724655958","https://openalex.org/W2033140727","https://openalex.org/W4250529536","https://openalex.org/W1963633157","https://openalex.org/W4290036070","https://openalex.org/W2412408187","https://openalex.org/W2171350946","https://openalex.org/W2047955402","https://openalex.org/W3016680418"],"abstract_inverted_index":{"Talking":[0],"face":[1],"generation":[2,43,235],"is":[3,158,207,219,237,248,279],"a":[4,9,41,105,126,152,202,233],"demanding":[5],"task":[6],"to":[7,32,39,49,129,160,189,221,228,239,250],"synthesize":[8],"high":[10],"quality":[11],"video":[12,102,200],"with":[13],"accurate":[14],"lip":[15,87,181],"synchronization":[16],"and":[17,30,54,96,132,145,173,195,243,258,272],"rhythmic":[18],"head":[19],"motion.":[20],"Any":[21],"subtle":[22],"artifacts":[23],"could":[24],"be":[25],"sensitively":[26],"captured":[27],"by":[28,137,167],"humans":[29],"lead":[31],"poor":[33],"visual":[34],"quality.":[35],"Existing":[36],"methods":[37,59],"tend":[38],"employ":[40],"conditional":[42],"solution,":[44],"which":[45],"introduces":[46],"facial":[47,64,276],"landmarks":[48],"bridge":[50],"the":[51,75,91,110,114,162,186,211,216,223,244,255,259],"input":[52],"information":[53],"output":[55],"videos.":[56],"However,":[57],"these":[58,119],"always":[60],"suffer":[61],"from":[62],"unrealistic":[63],"animations,":[65],"because":[66],"1)":[67],"they":[68,84,99],"only":[69,85],"take":[70],"single-mode":[71],"input,":[72],"but":[73,89,108],"ignore":[74,90,109],"complementarity":[76],"of":[77,164,178],"multimodal":[78,139,169],"inputs":[79,170],"for":[80,149,198],"lip-sync":[81,271],"improvement;":[82],"2)":[83],"explore":[86,185],"movements,":[88],"articulator":[92,141,191],"synergy":[93,192],"between":[94,226,254],"lips":[95,194],"jaw;":[97],"3)":[98],"generate":[100,130,274],"each":[101],"frame":[103],"in":[104,121],"temporal-independent":[106],"way,":[107],"temporal":[111,224],"continuity":[112,225],"among":[113,193],"entire":[115],"video.":[116],"To":[117],"address":[118],"limitations,":[120],"this":[122],"paper,":[123],"we":[124,183],"present":[125],"novel":[127,153],"method":[128],"realistic":[131,199],"temporally":[133],"coherent":[134],"talking":[135],"heads":[136],"considering":[138,180],"inputs,":[140],"synergy,":[142],"inter-frame":[143,230],"consistency":[144,253],"intra-frame":[146,252],"consistency.":[147,231],"Firstly,":[148],"landmark":[150,165],"prediction,":[151],"Multiple":[154],"Synergy":[155],"Network":[156,205],"(MSN)":[157],"proposed":[159,208,238],"improve":[161],"accuracy":[163],"prediction":[166],"incorporating":[168],"(i.e.,":[171],"audio":[172],"text":[174],"inputs).":[175],"Besides,":[176],"instead":[177],"merely":[179],"landmarks,":[182],"also":[184],"jaw":[187],"movements":[188],"ensure":[190,229,251],"jaw.":[196],"Secondly,":[197],"generation,":[201],"Video":[203],"Consistency":[204],"(VCN)":[206],"conditioned":[209],"on":[210,270],"predicted":[212],"landmarks.":[213],"In":[214],"VCN,":[215],"optical":[217],"flow":[218],"adopted":[220],"model":[222],"frames":[227],"Meanwhile,":[232],"mouth":[234,241,246,256],"branch":[236],"enhance":[240],"texture":[242],"corresponding":[245],"mask":[247],"employed":[249],"area":[257],"others.":[260],"Extensive":[261],"experiments":[262],"demonstrate":[263],"that":[264],"our":[265],"approach":[266],"exhibits":[267],"excellent":[268],"superiority":[269],"can":[273],"photo-realistic":[275],"animations.":[277],"Project":[278],"available":[280],"at":[281],"http://imcc.ustc.edu.cn/project/tfgen/.":[282]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
