{"id":"https://openalex.org/W4417466228","doi":"https://doi.org/10.48550/arxiv.2512.14056","title":"FacEDiT: Unified Talking Face Editing and Generation via Facial Motion Infilling","display_name":"FacEDiT: Unified Talking Face Editing and Generation via Facial Motion Infilling","publication_year":2025,"publication_date":"2025-12-16","ids":{"openalex":"https://openalex.org/W4417466228","doi":"https://doi.org/10.48550/arxiv.2512.14056"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2512.14056","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.14056","pdf_url":"https://arxiv.org/pdf/2512.14056","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2512.14056","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103159153","display_name":"Sung-Bin Kim","orcid":"https://orcid.org/0009-0005-8455-3298"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sung-Bin, Kim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002418613","display_name":"Joon\u2010Hyuk Chang","orcid":"https://orcid.org/0000-0003-2610-2323"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Joohyun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004717608","display_name":"David Harwath","orcid":"https://orcid.org/0000-0003-0206-0253"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Harwath, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5078114111","display_name":"Tae-Hyun Oh","orcid":"https://orcid.org/0000-0003-0468-1571"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oh, Tae-Hyun","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5103159153"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.7402999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.7402999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.2320999950170517,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11118","display_name":"Evolutionary Psychology and Human Behavior","score":0.0032999999821186066,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.6085000038146973},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.5637000203132629},{"id":"https://openalex.org/keywords/image-editing","display_name":"Image editing","score":0.5514000058174133},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.48989999294281006},{"id":"https://openalex.org/keywords/optical-flow","display_name":"Optical flow","score":0.4027000069618225},{"id":"https://openalex.org/keywords/facial-recognition-system","display_name":"Facial recognition system","score":0.3379000127315521},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.3203999996185303}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7669000029563904},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.6085000038146973},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.566100001335144},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.5637000203132629},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.5514000058174133},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.48989999294281006},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4690000116825104},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.4027000069618225},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3547999858856201},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.3379000127315521},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.3203999996185303},{"id":"https://openalex.org/C54654163","wikidata":"https://www.wikidata.org/wiki/Q5428359","display_name":"Face hallucination","level":5,"score":0.29809999465942383},{"id":"https://openalex.org/C4641261","wikidata":"https://www.wikidata.org/wiki/Q11681085","display_name":"Face detection","level":4,"score":0.28850001096725464},{"id":"https://openalex.org/C108154423","wikidata":"https://www.wikidata.org/wiki/Q1469792","display_name":"Salience (neuroscience)","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.2734000086784363},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.272599995136261},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.2596000134944916}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2512.14056","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.14056","pdf_url":"https://arxiv.org/pdf/2512.14056","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2512.14056","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.14056","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2512.14056","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.14056","pdf_url":"https://arxiv.org/pdf/2512.14056","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4417466228.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Talking":[0],"face":[1,4,55,141,159,189],"editing":[2,131,160],"and":[3,87,95,101,114,121,147,161,180],"generation":[5,94,162],"have":[6],"often":[7],"been":[8],"studied":[9],"as":[10,21,25,40,48,98,164],"distinct":[11],"problems.":[12],"In":[13,110],"this":[14,59],"work,":[15],"we":[16,61,133],"propose":[17,62],"viewing":[18],"both":[19,92],"not":[20],"separate":[22],"tasks":[23],"but":[24],"subtasks":[26,165],"of":[27,52,128,166],"a":[28,41,49,64,129],"unifying":[29,50],"formulation,":[30],"speech-conditional":[31,65,167],"facial":[32,37,81,174],"motion":[33,38,168],"infilling.":[34],"We":[35],"explore":[36],"infilling":[39],"self-supervised":[42],"pretext":[43],"task":[44],"that":[45,157],"also":[46],"serves":[47],"formulation":[51,90],"dynamic":[53],"talking":[54,140,158,188],"synthesis.":[56],"To":[57,124],"instantiate":[58],"idea,":[60],"FacEDiT,":[63],"Diffusion":[66],"Transformer":[67],"trained":[68],"with":[69,107,150,176],"flow":[70],"matching.":[71],"Inspired":[72],"by":[73],"masked":[74,80],"autoencoders,":[75],"FacEDiT":[76,170],"learns":[77],"to":[78,187],"synthesize":[79],"motions":[82,86],"conditioned":[83],"on":[84],"surrounding":[85],"speech.":[88],"This":[89],"enables":[91],"localized":[93],"edits,":[96],"such":[97],"substitution,":[99],"insertion,":[100],"deletion,":[102],"while":[103,184],"ensuring":[104],"seamless":[105],"transitions":[106],"unedited":[108],"regions.":[109],"addition,":[111],"biased":[112],"attention":[113],"temporal":[115],"smoothness":[116],"constraints":[117],"enhance":[118],"boundary":[119],"continuity":[120,183],"lip":[122],"synchronization.":[123],"address":[125],"the":[126,136],"lack":[127],"standard":[130],"benchmark,":[132],"introduce":[134],"FacEDiTBench,":[135],"first":[137],"dataset":[138],"for":[139],"editing,":[142],"featuring":[143],"diverse":[144],"edit":[145],"types":[146],"lengths,":[148],"along":[149],"new":[151],"evaluation":[152],"metrics.":[153],"Extensive":[154],"experiments":[155],"validate":[156],"emerge":[163],"infilling;":[169],"produces":[171],"accurate,":[172],"speech-aligned":[173],"edits":[175],"strong":[177],"identity":[178],"preservation":[179],"smooth":[181],"visual":[182],"generalizing":[185],"effectively":[186],"generation.":[190]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-12-18T00:00:00"}
