{"id":"https://openalex.org/W3118160031","doi":"https://doi.org/10.1155/2020/6629634","title":"Realistic Speech-Driven Talking Video Generation with Personalized Pose","display_name":"Realistic Speech-Driven Talking Video Generation with Personalized Pose","publication_year":2020,"publication_date":"2020-12-28","ids":{"openalex":"https://openalex.org/W3118160031","doi":"https://doi.org/10.1155/2020/6629634","mag":"3118160031"},"language":"en","primary_location":{"id":"doi:10.1155/2020/6629634","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2020/6629634","pdf_url":"https://downloads.hindawi.com/journals/complexity/2020/6629634.pdf","source":{"id":"https://openalex.org/S207319443","display_name":"Complexity","issn_l":"1076-2787","issn":["1076-2787","1099-0526"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319869","host_organization_name":"Hindawi Publishing Corporation","host_organization_lineage":["https://openalex.org/P4310319869"],"host_organization_lineage_names":["Hindawi Publishing Corporation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complexity","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://downloads.hindawi.com/journals/complexity/2020/6629634.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100437310","display_name":"Xu Zhang","orcid":"https://orcid.org/0000-0003-3632-4464"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Zhang","raw_affiliation_strings":["Jiangsu Key Laboratory of Big Data Analysis Technology, Nanjing University of Information Science and Technology, Nanjing 210044, China"],"affiliations":[{"raw_affiliation_string":"Jiangsu Key Laboratory of Big Data Analysis Technology, Nanjing University of Information Science and Technology, Nanjing 210044, China","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054915799","display_name":"Liguo Weng","orcid":"https://orcid.org/0000-0001-8281-5323"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liguo Weng","raw_affiliation_strings":["Jiangsu Key Laboratory of Big Data Analysis Technology, Nanjing University of Information Science and Technology, Nanjing 210044, China"],"affiliations":[{"raw_affiliation_string":"Jiangsu Key Laboratory of Big Data Analysis Technology, Nanjing University of Information Science and Technology, Nanjing 210044, China","institution_ids":["https://openalex.org/I200845125"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5054915799"],"corresponding_institution_ids":["https://openalex.org/I200845125"],"apc_list":{"value":2300,"currency":"USD","value_usd":2300},"apc_paid":{"value":2300,"currency":"USD","value_usd":2300},"fwci":0.2975,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.58504338,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"2020","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8716307878494263},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.602302610874176},{"id":"https://openalex.org/keywords/key-frame","display_name":"Key frame","score":0.5618917346000671},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5274482369422913},{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.47948703169822693},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47776615619659424},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.4613715410232544},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.42116421461105347},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.4142789840698242},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.39692050218582153},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.3677067756652832},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.13701876997947693},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.09172767400741577}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8716307878494263},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.602302610874176},{"id":"https://openalex.org/C2780139006","wikidata":"https://www.wikidata.org/wiki/Q1493902","display_name":"Key frame","level":3,"score":0.5618917346000671},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5274482369422913},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.47948703169822693},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47776615619659424},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.4613715410232544},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.42116421461105347},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.4142789840698242},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.39692050218582153},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.3677067756652832},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.13701876997947693},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.09172767400741577},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1155/2020/6629634","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2020/6629634","pdf_url":"https://downloads.hindawi.com/journals/complexity/2020/6629634.pdf","source":{"id":"https://openalex.org/S207319443","display_name":"Complexity","issn_l":"1076-2787","issn":["1076-2787","1099-0526"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319869","host_organization_name":"Hindawi Publishing Corporation","host_organization_lineage":["https://openalex.org/P4310319869"],"host_organization_lineage_names":["Hindawi Publishing Corporation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complexity","raw_type":"journal-article"},{"id":"pmh:oai:RePEc:hin:complx:6629634","is_oa":false,"landing_page_url":"http://downloads.hindawi.com/journals/8503/2020/6629634.xml","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:doaj.org/article:15bae4e5a72a4a18ad6a1dbdcd8ba557","is_oa":true,"landing_page_url":"https://doaj.org/article/15bae4e5a72a4a18ad6a1dbdcd8ba557","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Complexity, Vol 2020 (2020)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1155/2020/6629634","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2020/6629634","pdf_url":"https://downloads.hindawi.com/journals/complexity/2020/6629634.pdf","source":{"id":"https://openalex.org/S207319443","display_name":"Complexity","issn_l":"1076-2787","issn":["1076-2787","1099-0526"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319869","host_organization_name":"Hindawi Publishing Corporation","host_organization_lineage":["https://openalex.org/P4310319869"],"host_organization_lineage_names":["Hindawi Publishing Corporation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complexity","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7717822188","display_name":null,"funder_award_id":"42075130","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3118160031.pdf","grobid_xml":"https://content.openalex.org/works/W3118160031.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W1539720309","https://openalex.org/W2004789217","https://openalex.org/W2105055531","https://openalex.org/W2128173845","https://openalex.org/W2157331557","https://openalex.org/W2194775991","https://openalex.org/W2196707239","https://openalex.org/W2296650210","https://openalex.org/W2331128040","https://openalex.org/W2738406145","https://openalex.org/W2739192055","https://openalex.org/W2745771616","https://openalex.org/W2784435047","https://openalex.org/W2790933182","https://openalex.org/W2795109282","https://openalex.org/W2795230330","https://openalex.org/W2796438033","https://openalex.org/W2804619907","https://openalex.org/W2895226286","https://openalex.org/W2949662773","https://openalex.org/W2962730651","https://openalex.org/W2962795401","https://openalex.org/W2963076818","https://openalex.org/W2963091184","https://openalex.org/W2963163009","https://openalex.org/W2963389355","https://openalex.org/W2963418739","https://openalex.org/W2963522749","https://openalex.org/W2963864522","https://openalex.org/W2964243274","https://openalex.org/W2964304707","https://openalex.org/W2970006822","https://openalex.org/W2984529706","https://openalex.org/W2995238198","https://openalex.org/W3090412837","https://openalex.org/W3097792222","https://openalex.org/W3109875893","https://openalex.org/W4245773085","https://openalex.org/W4249433378","https://openalex.org/W4295731579","https://openalex.org/W6692078629","https://openalex.org/W6697376795","https://openalex.org/W6753914649","https://openalex.org/W6755083214"],"related_works":["https://openalex.org/W2066003895","https://openalex.org/W2537963312","https://openalex.org/W2537762514","https://openalex.org/W2349788282","https://openalex.org/W577271088","https://openalex.org/W2120801881","https://openalex.org/W1982853263","https://openalex.org/W855007925","https://openalex.org/W2618671746","https://openalex.org/W2898107007"],"abstract_inverted_index":{"In":[0,179,228,269],"this":[1,78,85],"work,":[2],"we":[3,122,238,279],"propose":[4],"a":[5,9,14,40,115,136,149,171,183,240],"method":[6,20,82,297,334],"to":[7,61,169,230,258,271],"transform":[8],"speaker\u2019s":[10,96],"speech":[11],"information":[12],"into":[13,187,244],"target":[15],"character\u2019s":[16],"talking":[17],"video;":[18],"the":[19,23,34,64,89,95,102,124,130,188,197,203,211,218,224,245,250,259,263,273,276,281,326],"could":[21,298],"make":[22],"mouth":[24,46],"shape":[25,47],"synchronization,":[26],"expression,":[27],"and":[28,48,63,98,108,145,154,156,163,176,208,233,254,286,307,311,329],"body":[29,99],"posture":[30,49],"more":[31,150,206],"realistic":[32,207,232],"in":[33,69,84,105,325],"synthesized":[35,189,198],"speaker":[36,74,127,199,213,305],"video.":[37],"This":[38],"is":[39,59,67,215],"challenging":[41],"task":[42],"because":[43],"changes":[44],"of":[45,91,94,117,139,196,249,262,275,318],"are":[50,200,314],"coupled":[51],"with":[52,322],"audio":[53,103,140,143,301],"semantic":[54],"information.":[55],"The":[56,81],"model":[57,65],"training":[58],"difficult":[60],"converge,":[62],"effect":[66,205],"unstable":[68],"complex":[70],"scenes.":[71],"Existing":[72],"speech-driven":[73],"methods":[75,324],"cannot":[76],"solve":[77],"problem":[79],"well.":[80],"proposed":[83,277],"paper":[86],"first":[87],"generates":[88],"sequence":[90,175,192,253],"key":[92,112,221,246],"points":[93,113,222],"face":[97],"postures":[100,313],"from":[101],"signal":[104],"real":[106,126],"time":[107],"then":[109],"visualizes":[110],"these":[111],"as":[114,168],"series":[116],"two-dimensional":[118],"skeleton":[119],"images.":[120],"Subsequently,":[121],"generate":[123,231,303],"final":[125,212],"video":[128,131,214,225],"through":[129,223,265],"generation":[132,226],"network.":[133,227],"We":[134],"take":[135],"random":[137],"sampling":[138],"clips,":[141],"encode":[142],"contents":[144],"temporal":[146],"correlations":[147],"using":[148,160],"effective":[151],"network":[152,158,248],"structure,":[153],"optimize":[155],"iterate":[157],"outputs":[159],"differential":[161],"loss":[162],"attitude":[164],"perception":[165],"loss,":[166],"so":[167],"obtain":[170],"smoother":[172],"pose":[173,191,235,252],"key-point":[174],"better":[177,315,336],"performance.":[178],"addition,":[180],"by":[181,217],"inserting":[182],"specified":[184],"action":[185,194],"frame":[186],"human":[190],"window,":[193],"poses":[195],"enriched,":[201],"making":[202],"synthesis":[204],"natural.":[209],"Then,":[210],"generated":[216,251],"obtained":[219],"gesture":[220],"order":[229,270],"high-resolution":[234],"detail":[236],"videos,":[237,306],"insert":[239],"local":[241,260],"attention":[242,257],"mechanism":[243],"point":[247],"give":[255],"higher":[256],"details":[261],"characters":[264],"spatial":[266],"weight":[267],"masks.":[268],"verify":[272],"effectiveness":[274],"method,":[278],"used":[280],"objective":[282],"evaluation":[283,289],"index":[284,328],"NME":[285,327],"user":[287,330],"subjective":[288,331],"methods,":[290],"respectively.":[291],"Experiment":[292],"results":[293],"showed":[294,335],"that":[295],"our":[296,333],"vividly":[299],"use":[300],"contentsto":[302],"corresponding":[304],"its":[308],"lip-matching":[309],"accuracy":[310],"expression":[312],"than":[316],"those":[317],"previous":[319],"work.":[320],"Compared":[321],"existing":[323],"evaluation,":[332],"results.":[337]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
