{"id":"https://openalex.org/W4400275358","doi":"https://doi.org/10.1109/lsp.2024.3422816","title":"MergeTalk: Audio-Driven Talking Head Generation From Single Image With Feature Merge","display_name":"MergeTalk: Audio-Driven Talking Head Generation From Single Image With Feature Merge","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4400275358","doi":"https://doi.org/10.1109/lsp.2024.3422816"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2024.3422816","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2024.3422816","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022177133","display_name":"Jian Gao","orcid":"https://orcid.org/0009-0000-9445-1918"},"institutions":[{"id":"https://openalex.org/I32574673","display_name":"Shantou University","ror":"https://ror.org/01a099706","country_code":"CN","type":"education","lineage":["https://openalex.org/I32574673"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jian Gao","raw_affiliation_strings":["Department of Mechanical Engineering, Shantou University, Shantou, China"],"raw_orcid":"https://orcid.org/0009-0000-9445-1918","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, Shantou University, Shantou, China","institution_ids":["https://openalex.org/I32574673"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101809228","display_name":"Chang Shu","orcid":"https://orcid.org/0000-0001-8719-3690"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chang Shu","raw_affiliation_strings":["Ping An Technology Company, Ltd., Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-8719-3690","affiliations":[{"raw_affiliation_string":"Ping An Technology Company, Ltd., Shenzhen, China","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114186556","display_name":"Ximin Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ximin Zheng","raw_affiliation_strings":["Ping An Technology Company, Ltd., Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ping An Technology Company, Ltd., Shenzhen, China","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110325683","display_name":"Zheng L\u00fc","orcid":"https://orcid.org/0009-0002-8784-3168"},"institutions":[{"id":"https://openalex.org/I13591777","display_name":"University of Nottingham Ningbo China","ror":"https://ror.org/03y4dt428","country_code":"CN","type":"education","lineage":["https://openalex.org/I13591777","https://openalex.org/I142263535"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng Lu","raw_affiliation_strings":["University of Nottingham Ningbo China, Ningbo, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Nottingham Ningbo China, Ningbo, China","institution_ids":["https://openalex.org/I13591777"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103875359","display_name":"Nengsheng Bao","orcid":null},"institutions":[{"id":"https://openalex.org/I32574673","display_name":"Shantou University","ror":"https://ror.org/01a099706","country_code":"CN","type":"education","lineage":["https://openalex.org/I32574673"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nengsheng Bao","raw_affiliation_strings":["Department of Mechanical Engineering, Shantou University, Shantou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, Shantou University, Shantou, China","institution_ids":["https://openalex.org/I32574673"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5022177133"],"corresponding_institution_ids":["https://openalex.org/I32574673"],"apc_list":null,"apc_paid":null,"fwci":0.9523,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.75524902,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"31","issue":null,"first_page":"1850","last_page":"1854"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8594838380813599},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6830403208732605},{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.6073158979415894},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6040613651275635},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.5288171768188477},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.41821372509002686},{"id":"https://openalex.org/keywords/facial-recognition-system","display_name":"Facial recognition system","score":0.4127959609031677},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.40678903460502625},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4056513011455536},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3521212935447693}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8594838380813599},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6830403208732605},{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.6073158979415894},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6040613651275635},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.5288171768188477},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.41821372509002686},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.4127959609031677},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.40678903460502625},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4056513011455536},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3521212935447693},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C194257627","wikidata":"https://www.wikidata.org/wiki/Q211554","display_name":"Amplifier","level":3,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2024.3422816","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2024.3422816","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320324778","display_name":"Ningbo Municipal Bureau of Science and Technology","ror":"https://ror.org/00gskyj95"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1686810756","https://openalex.org/W1924770834","https://openalex.org/W2194775991","https://openalex.org/W2594690981","https://openalex.org/W2808631503","https://openalex.org/W2944294033","https://openalex.org/W2962785568","https://openalex.org/W2963073614","https://openalex.org/W2964449965","https://openalex.org/W2969985801","https://openalex.org/W3019952993","https://openalex.org/W3035574324","https://openalex.org/W3081492798","https://openalex.org/W3101631197","https://openalex.org/W3109114891","https://openalex.org/W3176913662","https://openalex.org/W3186090335","https://openalex.org/W3187364420","https://openalex.org/W4200631136","https://openalex.org/W4310379947","https://openalex.org/W4317796973","https://openalex.org/W4385245566","https://openalex.org/W4386066256","https://openalex.org/W4386072021","https://openalex.org/W4391019513","https://openalex.org/W4393248189","https://openalex.org/W6631190155","https://openalex.org/W6637373629","https://openalex.org/W6640212811","https://openalex.org/W6810221379","https://openalex.org/W6862939797"],"related_works":["https://openalex.org/W3000097931","https://openalex.org/W2354322770","https://openalex.org/W4237547500","https://openalex.org/W1570848052","https://openalex.org/W2373192430","https://openalex.org/W4239268388","https://openalex.org/W4243305035","https://openalex.org/W1537496349","https://openalex.org/W3211385606","https://openalex.org/W2985118265"],"abstract_inverted_index":{"Audio-driven":[0],"talking":[1,90],"head":[2,20,80,91,128],"generation":[3,59],"has":[4,159],"wide":[5],"real":[6],"world":[7],"applications":[8],"but":[9],"remains":[10],"challenging":[11],"due":[12],"to":[13,61,72,108],"the":[14,46,68,74,88,113,120,124,131,137,152],"problems":[15],"such":[16,93],"as":[17,49,94],"audio-lip":[18,75],"synchronization,":[19],"poses,":[21],"identity":[22,84,115],"preservation,":[23],"video":[24],"quality,":[25],"etc.":[26],"We":[27,52],"propose":[28,101],"a":[29,41,102],"novel":[30,103],"two-stage":[31],"framework":[32,150],"that":[33,148],"uses":[34],"explicit":[35],"3D":[36,42,56,132],"face":[37,104,133],"images":[38],"rendered":[39],"from":[40,119,130],"model":[43,71],"based":[44,140],"on":[45,141],"audio":[47],"input,":[48],"intermediate":[50],"features.":[51],"devise":[53],"two":[54],"independent":[55],"motion":[57],"parameter":[58],"networks":[60],"generate":[62,136],"expression":[63],"and":[64,78,98,111,123,127,135,158],"pose":[65],"parameters":[66],"for":[67],"popular":[69],"3DMM":[70],"solve":[73],"synchronization":[76],"problem":[77],"natural":[79],"poses":[81,129],"without":[82],"losing":[83],"information.":[85],"To":[86],"improve":[87],"final":[89,138],"quality":[92],"avoiding":[95],"facial":[96,117],"distortion":[97],"artifacts,":[99],"we":[100],"feature":[105],"merge":[106],"network":[107],"accurately":[109],"extract":[110],"fuse":[112],"background,":[114],"information,":[116],"texture":[118],"source":[121],"image,":[122],"lip":[125],"movements":[126],"images,":[134],"videos":[139],"generative":[142],"adversarial":[143],"networks.":[144],"Extensive":[145],"experiments":[146],"show":[147],"our":[149],"outperforms":[151],"SOTA":[153],"methods":[154],"in":[155],"several":[156],"aspects":[157],"good":[160],"generalization":[161],"ability.":[162]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
