{"id":"https://openalex.org/W4388739184","doi":"https://doi.org/10.1109/jstsp.2023.3333552","title":"StableFace: Analyzing and Improving Motion Stability for Talking Face Generation","display_name":"StableFace: Analyzing and Improving Motion Stability for Talking Face Generation","publication_year":2023,"publication_date":"2023-11-01","ids":{"openalex":"https://openalex.org/W4388739184","doi":"https://doi.org/10.1109/jstsp.2023.3333552"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2023.3333552","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2023.3333552","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103020698","display_name":"Jun Ling","orcid":"https://orcid.org/0000-0001-7260-7141"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Ling","raw_affiliation_strings":["Institute of Image Communication and Network Engineering, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-7260-7141","affiliations":[{"raw_affiliation_string":"Institute of Image Communication and Network Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101522530","display_name":"Xu Tan","orcid":"https://orcid.org/0000-0001-5631-0639"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Tan","raw_affiliation_strings":["Microsoft Research Asia (MSRA), Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5631-0639","affiliations":[{"raw_affiliation_string":"Microsoft Research Asia (MSRA), Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101849127","display_name":"Liyang Chen","orcid":"https://orcid.org/0000-0001-6985-8281"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liyang Chen","raw_affiliation_strings":["Tsinghua University, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-6985-8281","affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102903512","display_name":"Runnan Li","orcid":"https://orcid.org/0000-0002-0922-003X"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Runnan Li","raw_affiliation_strings":["Microsoft Cloud+AI, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-0922-003X","affiliations":[{"raw_affiliation_string":"Microsoft Cloud+AI, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100649616","display_name":"Yuchao Zhang","orcid":"https://orcid.org/0009-0006-0191-3612"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuchao Zhang","raw_affiliation_strings":["Microsoft Cloud+AI, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-0191-3612","affiliations":[{"raw_affiliation_string":"Microsoft Cloud+AI, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100329353","display_name":"Sheng Zhao","orcid":"https://orcid.org/0000-0002-9624-5381"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sheng Zhao","raw_affiliation_strings":["Microsoft Cloud+AI, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9624-5381","affiliations":[{"raw_affiliation_string":"Microsoft Cloud+AI, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002404808","display_name":"Li Song","orcid":"https://orcid.org/0000-0002-7124-5182"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Song","raw_affiliation_strings":["Institute of Image Communication and Network Engineering, Shanghai Jiao Tong University, Shanghai, China","MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-7124-5182","affiliations":[{"raw_affiliation_string":"Institute of Image Communication and Network Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.6842,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.86778981,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"17","issue":"6","first_page":"1232","last_page":"1247"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9805999994277954,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6080425381660461},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.6019114851951599},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.5181278586387634},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5043700933456421},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4944610595703125},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.48500415682792664},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.20935550332069397}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6080425381660461},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.6019114851951599},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.5181278586387634},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5043700933456421},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4944610595703125},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.48500415682792664},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.20935550332069397},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jstsp.2023.3333552","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2023.3333552","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4950456183","display_name":null,"funder_award_id":"BP0719010","funder_id":"https://openalex.org/F4320327912","funder_display_name":"Higher Education Discipline Innovation Project"},{"id":"https://openalex.org/G6360183275","display_name":null,"funder_award_id":"22DZ2229005","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"}],"funders":[{"id":"https://openalex.org/F4320307764","display_name":"Microsoft","ror":"https://ror.org/00d0nc645"},{"id":"https://openalex.org/F4320321885","display_name":"Science and Technology Commission of Shanghai Municipality","ror":"https://ror.org/03kt66j61"},{"id":"https://openalex.org/F4320327912","display_name":"Higher Education Discipline Innovation Project","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":82,"referenced_works":["https://openalex.org/W1981693481","https://openalex.org/W2015143272","https://openalex.org/W2054080206","https://openalex.org/W2079812794","https://openalex.org/W2119692610","https://openalex.org/W2130495284","https://openalex.org/W2133665775","https://openalex.org/W2136831404","https://openalex.org/W2162220380","https://openalex.org/W2169574628","https://openalex.org/W2194775991","https://openalex.org/W2237250383","https://openalex.org/W2301937176","https://openalex.org/W2331128040","https://openalex.org/W2604379605","https://openalex.org/W2738406145","https://openalex.org/W2769666294","https://openalex.org/W2806833697","https://openalex.org/W2884460600","https://openalex.org/W2886787375","https://openalex.org/W2902346020","https://openalex.org/W2902836694","https://openalex.org/W2944294033","https://openalex.org/W2949662773","https://openalex.org/W2960274051","https://openalex.org/W2963081548","https://openalex.org/W2963290645","https://openalex.org/W2963317244","https://openalex.org/W2965644659","https://openalex.org/W2979894294","https://openalex.org/W3006410788","https://openalex.org/W3022710784","https://openalex.org/W3035318015","https://openalex.org/W3068510429","https://openalex.org/W3081492798","https://openalex.org/W3086926995","https://openalex.org/W3087121792","https://openalex.org/W3091550498","https://openalex.org/W3096609285","https://openalex.org/W3097792222","https://openalex.org/W3099047215","https://openalex.org/W3101631197","https://openalex.org/W3103801904","https://openalex.org/W3104792420","https://openalex.org/W3161892325","https://openalex.org/W3174763799","https://openalex.org/W3175779516","https://openalex.org/W3180794345","https://openalex.org/W3186090335","https://openalex.org/W3192148059","https://openalex.org/W3195529437","https://openalex.org/W3201844719","https://openalex.org/W3204715535","https://openalex.org/W3205994442","https://openalex.org/W3207849023","https://openalex.org/W3208601549","https://openalex.org/W3211147706","https://openalex.org/W3214444848","https://openalex.org/W4200174933","https://openalex.org/W4200630629","https://openalex.org/W4206517532","https://openalex.org/W4206704860","https://openalex.org/W4214626920","https://openalex.org/W4214738329","https://openalex.org/W4224310085","https://openalex.org/W4281730245","https://openalex.org/W4296438045","https://openalex.org/W4312959196","https://openalex.org/W4313147094","https://openalex.org/W4361766807","https://openalex.org/W6687566353","https://openalex.org/W6737896281","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6763832098","https://openalex.org/W6767264202","https://openalex.org/W6781874425","https://openalex.org/W6784094891","https://openalex.org/W6784333009","https://openalex.org/W6784470723","https://openalex.org/W6803277327","https://openalex.org/W6849049266"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"While":[0],"previous":[1,234],"methods":[2],"for":[3],"speech-driven":[4],"talking":[5,40,106,197,227],"face":[6,41,74,107,114,144,198],"generation":[7,126],"have":[8,24],"shown":[9],"significant":[10],"advances":[11],"in":[12,103,124,149,164],"improving":[13],"the":[14,20,36,51,62,78,104,112,125,142,150,157,161,168,212,219,222],"visual":[15],"and":[16,47,81,84,118],"lip-sync":[17],"quality":[18,38,232],"of":[19,39,121,160,196,221],"synthesized":[21,105],"videos,":[22],"they":[23],"paid":[25],"less":[26],"attention":[27],"to":[28,49,60,76,89,101,140,146,156,166,171,179,209,233],"lip":[29],"motion":[30,45,63,91,194,213],"jitters":[31,102,110,148,195],"which":[32],"can":[33,99,192],"substantially":[34],"undermine":[35],"perceived":[37],"videos.":[42],"What":[43],"causes":[44],"jitters,":[46],"how":[48],"mitigate":[50],"problem?":[52],"In":[53,183],"this":[54],"article,":[55],"we":[56,129,200],"conduct":[57],"systematic":[58],"analyses":[59],"investigate":[61],"jittering":[64],"problem":[65],"based":[66],"on":[67,225],"a":[68,119,135],"state-of-the-art":[69],"pipeline":[70],"that":[71,96,191],"utilizes":[72],"3D":[73,143],"representations":[75,145],"bridge":[77],"input":[79,113,158],"audio":[80],"output":[82],"video,":[83,108,199],"implement":[85],"several":[86,97],"effective":[87,132],"designs":[88],"improve":[90],"stability.":[92],"This":[93],"study":[94],"finds":[95],"factors":[98],"lead":[100],"including":[109],"from":[111],"representations,":[115],"training-inference":[116],"mismatch,":[117],"lack":[120],"dependency":[122],"modeling":[123],"network.":[127],"Accordingly,":[128],"propose":[130],"three":[131],"solutions:":[133],"1)":[134],"Gaussian-based":[136],"adaptive":[137],"smoothing":[138],"module":[139],"smooth":[141],"eliminate":[147],"input;":[151],"2)":[152],"augmented":[153],"erosions":[154],"added":[155],"data":[159],"neural":[162],"renderer":[163],"training":[165],"simulate":[167],"inference":[169],"distortion":[170],"reduce":[172],"mismatch;":[173],"3)":[174],"an":[175,202],"audio-fused":[176],"transformer":[177],"generator":[178],"model":[180],"inter-frame":[181],"dependency.":[182],"addition,":[184],"considering":[185],"there":[186],"is":[187],"no":[188],"off-the-shelf":[189],"metric":[190,204],"measures":[193],"devise":[201],"objective":[203],"(Motion":[205],"Stability":[206],"Index,":[207],"MSI)":[208],"quantitatively":[210],"measure":[211],"jitters.":[214],"Extensive":[215],"experimental":[216],"results":[217],"show":[218],"superiority":[220],"proposed":[223],"method":[224],"motion-stable":[226],"video":[228],"generation,":[229],"with":[230],"superior":[231],"systems.":[235]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
