{"id":"https://openalex.org/W4387968060","doi":"https://doi.org/10.1145/3581783.3611765","title":"Context-Aware Talking-Head Video Editing","display_name":"Context-Aware Talking-Head Video Editing","publication_year":2023,"publication_date":"2023-10-26","ids":{"openalex":"https://openalex.org/W4387968060","doi":"https://doi.org/10.1145/3581783.3611765"},"language":"en","primary_location":{"id":"doi:10.1145/3581783.3611765","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3611765","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101649664","display_name":"Songlin Yang","orcid":"https://orcid.org/0000-0003-3403-376X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Songlin Yang","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100757829","display_name":"Wei Wang","orcid":"https://orcid.org/0000-0002-8598-0831"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Wang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103020698","display_name":"Jun Ling","orcid":"https://orcid.org/0000-0001-7260-7141"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Ling","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042160852","display_name":"Bo Peng","orcid":"https://orcid.org/0000-0002-9014-7369"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Peng","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101522530","display_name":"Xu Tan","orcid":"https://orcid.org/0000-0001-5631-0639"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Tan","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017743261","display_name":"Jing Dong","orcid":"https://orcid.org/0000-0002-2763-7832"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Dong","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101649664"],"corresponding_institution_ids":["https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":1.981,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.87600744,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"7718","last_page":"7727"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8677124977111816},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.7275701761245728},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5271223187446594},{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.511861264705658},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5058664679527283},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3911433815956116},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.36717432737350464}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8677124977111816},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.7275701761245728},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5271223187446594},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.511861264705658},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5058664679527283},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3911433815956116},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.36717432737350464}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581783.3611765","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3611765","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5886109581","display_name":null,"funder_award_id":"Grant No. 2021YFC3320103","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W219197677","https://openalex.org/W1964202597","https://openalex.org/W2015143272","https://openalex.org/W2237250383","https://openalex.org/W2486034530","https://openalex.org/W2604379605","https://openalex.org/W2626696598","https://openalex.org/W2737658251","https://openalex.org/W2738406145","https://openalex.org/W2745771616","https://openalex.org/W2886934227","https://openalex.org/W2914217321","https://openalex.org/W2944294033","https://openalex.org/W2949662773","https://openalex.org/W2960274051","https://openalex.org/W2963081548","https://openalex.org/W2963290645","https://openalex.org/W2964559396","https://openalex.org/W2979894294","https://openalex.org/W2981263323","https://openalex.org/W2999905431","https://openalex.org/W3019952993","https://openalex.org/W3034211329","https://openalex.org/W3037784242","https://openalex.org/W3081492798","https://openalex.org/W3089177030","https://openalex.org/W3101631197","https://openalex.org/W3104792420","https://openalex.org/W3109114891","https://openalex.org/W3114462840","https://openalex.org/W3153220274","https://openalex.org/W3174763799","https://openalex.org/W3175342695","https://openalex.org/W3175779516","https://openalex.org/W3180794345","https://openalex.org/W3182714435","https://openalex.org/W3186090335","https://openalex.org/W3192148059","https://openalex.org/W3197199219","https://openalex.org/W3205688125","https://openalex.org/W3205994442","https://openalex.org/W3211147706","https://openalex.org/W4200150166","https://openalex.org/W4214731463","https://openalex.org/W4312578889","https://openalex.org/W4312959196","https://openalex.org/W6779809370"],"related_works":["https://openalex.org/W2371381961","https://openalex.org/W291250033","https://openalex.org/W2035757446","https://openalex.org/W2008385118","https://openalex.org/W880955280","https://openalex.org/W2215755978","https://openalex.org/W2106647072","https://openalex.org/W4246858109","https://openalex.org/W2172753644","https://openalex.org/W54172855"],"abstract_inverted_index":{"Talking-head":[0],"video":[1,15,36,96,106,115,189,205],"editing":[2,30,242],"aims":[3],"to":[4,108,131,164,183,199,227],"efficiently":[5,146,239],"insert,":[6],"delete,":[7],"and":[8,44,55,72,87,92,123,134,149,168,173,203,219,248],"substitute":[9],"the":[10,79,105,155,170,201,208],"word":[11],"of":[12,69,74,82],"a":[13,17,110,161,176,187,195,215,229],"pre-recorded":[14],"through":[16],"text":[18],"transcript":[19],"editor.":[20],"The":[21],"key":[22],"challenge":[23],"for":[24,95,113,224],"this":[25,100,129],"task":[26],"is":[27],"obtaining":[28],"an":[29,141],"model":[31],"that":[32,64,145,236],"generates":[33],"new":[34],"talking-head":[35,114],"clips":[37],"which":[38,117],"simultaneously":[39],"have":[40],"accurate":[41],"lip":[42,85,249],"synchronization":[43],"motion":[45,121,132,151,210],"smoothness.":[46,125],"Previous":[47],"approaches,":[48],"including":[49],"3DMM-based":[50],"(3D":[51],"Morphable":[52],"Model)":[53],"methods":[54],"NeRF-based":[56],"(Neural":[57],"Radiance":[58],"Field)":[59],"methods,":[60],"are":[61],"sub-optimal":[62],"in":[63],"they":[65],"either":[66],"require":[67],"minutes":[68],"source":[70],"videos":[71],"days":[73],"training":[75],"time":[76],"or":[77],"lack":[78],"disentangled":[80,120],"control":[81],"verbal":[83],"(e.g.,":[84,89],"motion)":[86],"non-verbal":[88],"head":[90,217],"pose":[91],"expression)":[93],"representations":[94],"clip":[97],"insertion.":[98],"In":[99],"work,":[101],"we":[102,127],"fully":[103],"utilize":[104],"context":[107,166],"design":[109,140],"novel":[111,184],"framework":[112,130],"editing,":[116],"achieves":[118,240],"efficiency,":[119,172],"control,":[122],"sequential":[124],"Specifically,":[126],"decompose":[128],"prediction":[133,143,171],"motion-conditioned":[135],"rendering:":[136],"(1)":[137],"We":[138,192],"first":[139],"animation":[142],"module":[144,159,198,213],"obtains":[147],"smooth":[148],"lip-sync":[150],"sequences":[152],"conditioned":[153],"on":[154],"driven":[156],"speech.":[157],"This":[158,212],"adopts":[160,214],"non-autoregressive":[162],"network":[163],"obtain":[165,228],"prior":[167,179],"improve":[169],"it":[174],"learns":[175],"speech-animation":[177],"mapping":[178],"with":[180,244],"better":[181],"generalization":[182],"speech":[185],"from":[186],"multi-identity":[188],"dataset.":[190],"(2)":[191],"then":[193],"introduce":[194],"neural":[196],"rendering":[197,231],"synthesize":[200],"photo-realistic":[202],"full-head":[204],"frames":[206,223],"given":[207],"predicted":[209],"sequence.":[211],"pre-trained":[216],"topology":[218],"uses":[220],"only":[221],"few":[222],"efficient":[225],"fine-tuning":[226],"person-specific":[230],"model.":[232],"Extensive":[233],"experiments":[234],"demonstrate":[235],"our":[237],"method":[238],"smoother":[241],"results":[243],"higher":[245],"image":[246],"quality":[247],"accuracy":[250],"using":[251],"less":[252],"data":[253],"than":[254],"previous":[255],"methods.":[256]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":5}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
