{"id":"https://openalex.org/W4409797546","doi":"https://doi.org/10.1142/s021800142557006x","title":"One-Step Multi-Frame Inpainting Framework for Real-Time Lip-Sync Digital Human Generation","display_name":"One-Step Multi-Frame Inpainting Framework for Real-Time Lip-Sync Digital Human Generation","publication_year":2025,"publication_date":"2025-04-26","ids":{"openalex":"https://openalex.org/W4409797546","doi":"https://doi.org/10.1142/s021800142557006x"},"language":"en","primary_location":{"id":"doi:10.1142/s021800142557006x","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s021800142557006x","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058864361","display_name":"Yijun Bei","orcid":"https://orcid.org/0000-0001-5720-6374"},"institutions":[{"id":"https://openalex.org/I159389169","display_name":"Ningbo University of Technology","ror":"https://ror.org/037dym702","country_code":"CN","type":"education","lineage":["https://openalex.org/I159389169"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yijun Bei","raw_affiliation_strings":["School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China"],"raw_orcid":"https://orcid.org/0000-0001-5720-6374","affiliations":[{"raw_affiliation_string":"School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I159389169"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yunze Qi","orcid":"https://orcid.org/0009-0003-4680-2842"},"institutions":[{"id":"https://openalex.org/I159389169","display_name":"Ningbo University of Technology","ror":"https://ror.org/037dym702","country_code":"CN","type":"education","lineage":["https://openalex.org/I159389169"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunze Qi","raw_affiliation_strings":["School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China"],"raw_orcid":"https://orcid.org/0009-0003-4680-2842","affiliations":[{"raw_affiliation_string":"School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I159389169"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004651483","display_name":"Hengrui Lou","orcid":null},"institutions":[{"id":"https://openalex.org/I159389169","display_name":"Ningbo University of Technology","ror":"https://ror.org/037dym702","country_code":"CN","type":"education","lineage":["https://openalex.org/I159389169"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hengrui Lou","raw_affiliation_strings":["School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China"],"raw_orcid":"https://orcid.org/0009-0001-5765-4094","affiliations":[{"raw_affiliation_string":"School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I159389169"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010410284","display_name":"Erteng Liu","orcid":"https://orcid.org/0009-0005-6502-4454"},"institutions":[{"id":"https://openalex.org/I159389169","display_name":"Ningbo University of Technology","ror":"https://ror.org/037dym702","country_code":"CN","type":"education","lineage":["https://openalex.org/I159389169"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Erteng Liu","raw_affiliation_strings":["School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China"],"raw_orcid":"https://orcid.org/0009-0005-6502-4454","affiliations":[{"raw_affiliation_string":"School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I159389169"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ke Wang","orcid":"https://orcid.org/0009-0006-0282-2376"},"institutions":[{"id":"https://openalex.org/I159389169","display_name":"Ningbo University of Technology","ror":"https://ror.org/037dym702","country_code":"CN","type":"education","lineage":["https://openalex.org/I159389169"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ke Wang","raw_affiliation_strings":["School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China"],"raw_orcid":"https://orcid.org/0009-0006-0282-2376","affiliations":[{"raw_affiliation_string":"School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I159389169"]}]},{"author_position":"last","author":{"id":null,"display_name":"Hongchang Zhang","orcid":"https://orcid.org/0009-0008-0407-2497"},"institutions":[{"id":"https://openalex.org/I159389169","display_name":"Ningbo University of Technology","ror":"https://ror.org/037dym702","country_code":"CN","type":"education","lineage":["https://openalex.org/I159389169"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongchang Zhang","raw_affiliation_strings":["School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China"],"raw_orcid":"https://orcid.org/0009-0008-0407-2497","affiliations":[{"raw_affiliation_string":"School of Software Technology Zhejiang University, Ningbo 310048, P.\u00a0R.\u00a0China","institution_ids":["https://openalex.org/I159389169"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5058864361"],"corresponding_institution_ids":["https://openalex.org/I159389169"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0882097,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"39","issue":"09","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9056000113487244,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9056000113487244,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inpainting","display_name":"Inpainting","score":0.8210296630859375},{"id":"https://openalex.org/keywords/sync","display_name":"sync","score":0.6927814483642578},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6839872002601624},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.6823798418045044},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6629937887191772},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6494066715240479},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.4134526252746582},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.39052534103393555},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.0853148102760315}],"concepts":[{"id":"https://openalex.org/C11727466","wikidata":"https://www.wikidata.org/wiki/Q1628157","display_name":"Inpainting","level":3,"score":0.8210296630859375},{"id":"https://openalex.org/C3913047","wikidata":"https://www.wikidata.org/wiki/Q1956265","display_name":"sync","level":3,"score":0.6927814483642578},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6839872002601624},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.6823798418045044},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6629937887191772},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6494066715240479},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.4134526252746582},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.39052534103393555},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0853148102760315}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s021800142557006x","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s021800142557006x","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1963926913","https://openalex.org/W1975781079","https://openalex.org/W2145038566","https://openalex.org/W2194775991","https://openalex.org/W2341528187","https://openalex.org/W2395639500","https://openalex.org/W2593414223","https://openalex.org/W2605048851","https://openalex.org/W2883111419","https://openalex.org/W2884585870","https://openalex.org/W2944294033","https://openalex.org/W2979503749","https://openalex.org/W2996833883","https://openalex.org/W3081492798","https://openalex.org/W3090234790","https://openalex.org/W3099284785","https://openalex.org/W3168825659","https://openalex.org/W3186090335","https://openalex.org/W3197199219","https://openalex.org/W3204680331","https://openalex.org/W3211147706","https://openalex.org/W4283732315","https://openalex.org/W4312444931","https://openalex.org/W4312933868","https://openalex.org/W4312990833","https://openalex.org/W4319300274","https://openalex.org/W4382240211","https://openalex.org/W4386065999","https://openalex.org/W4386071653","https://openalex.org/W4386075487","https://openalex.org/W4390730260","https://openalex.org/W4391281763","https://openalex.org/W4393159974","https://openalex.org/W4394597155","https://openalex.org/W4395463166","https://openalex.org/W4397003263","https://openalex.org/W4399344807","https://openalex.org/W4401054635","https://openalex.org/W4402726971","https://openalex.org/W4402727013","https://openalex.org/W4403081627","https://openalex.org/W4404124911"],"related_works":["https://openalex.org/W2017457812","https://openalex.org/W3178025616","https://openalex.org/W2060947339","https://openalex.org/W2131831293","https://openalex.org/W2946160871","https://openalex.org/W3035059915","https://openalex.org/W1995073329","https://openalex.org/W425542480","https://openalex.org/W49967185","https://openalex.org/W2107727507"],"abstract_inverted_index":{"In":[0],"recent":[1],"times,":[2],"audio-driven":[3],"lip-synching":[4],"generation":[5,45],"for":[6],"digital":[7],"humans":[8],"has":[9,33,177],"attracted":[10],"considerable":[11],"attention.":[12],"However,":[13],"the":[14,30,88,199],"prevailing":[15],"methodologies":[16],"frequently":[17],"encounter":[18],"challenges":[19],"pertaining":[20],"to":[21,120,165],"elevated":[22],"computational":[23],"complexity":[24],"and":[25,54,59,78,107,132,145,149,170,185,191,209],"deficient":[26],"real-time":[27,211],"performance.":[28],"Although":[29],"MuseTalk":[31],"framework":[32,72,201],"achieved":[34],"notable":[35],"progress":[36],"in":[37],"inference":[38,164],"efficiency":[39],"through":[40],"its":[41,214],"end-to-end,":[42],"latent-space-based":[43],"single-step":[44],"algorithm,":[46],"it":[47],"still":[48],"suffers":[49],"from":[50],"noticeable":[51],"lip":[52,60,90,105,125,168,173,207],"jitter":[53],"insufficient":[55],"synchronization":[56,106],"between":[57],"audio":[58,100],"movements.":[61],"To":[62,127],"address":[63],"these":[64],"limitations,":[65],"we":[66,112,134],"propose":[67,135],"an":[68,150],"enhanced":[69,151],"multi-frame":[70,94,116],"inpainting":[71],"that":[73,198],"integrates":[74],"Variational":[75],"Autoencoders":[76],"(VAE)":[77],"a":[79,114,136,157,181],"multi-scale":[80],"U-Net":[81],"architecture.":[82],"Specifically,":[83],"our":[84],"approach":[85],"directly":[86],"synthesizes":[87],"occluded":[89],"region":[91],"by":[92],"leveraging":[93],"visual":[95,204],"references":[96],"combined":[97],"with":[98],"corresponding":[99],"embeddings,":[101],"thereby":[102],"effectively":[103],"improving":[104],"maintaining":[108],"identity":[109],"consistency.":[110],"Furthermore,":[111],"introduce":[113],"landmark-guided":[115],"sampling":[117],"strategy":[118],"designed":[119],"enhance":[121],"model":[122,176],"attention":[123],"towards":[124],"dynamics.":[126],"facilitate":[128],"deeper":[129],"feature":[130,139],"extraction":[131],"fusion,":[133],"hierarchical":[137],"latent-space":[138],"fusion":[140],"network":[141],"(FusionNet),":[142],"incorporating":[143],"global":[144],"local":[146],"residual":[147],"connections":[148],"Convolutional":[152],"Block":[153],"Attention":[154],"Module.":[155],"Additionally,":[156],"frame":[158],"interpolation":[159],"technique":[160],"is":[161],"employed":[162],"during":[163],"further":[166],"smooth":[167],"movements":[169],"significantly":[171],"mitigate":[172],"jitter.":[174],"The":[175,194],"been":[178],"trained":[179],"on":[180],"large-scale":[182],"Chinese":[183,190],"dataset":[184],"comprehensively":[186],"evaluated":[187],"using":[188],"both":[189],"English":[192],"datasets.":[193],"experimental":[195],"results":[196],"demonstrate":[197],"proposed":[200],"achieves":[202],"high":[203],"accuracy,":[205],"consistent":[206],"synchronization,":[208],"efficient":[210],"inference,":[212],"highlighting":[213],"strong":[215],"cross-lingual":[216],"generalization":[217],"capability.":[218]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
