{"id":"https://openalex.org/W3187364420","doi":"https://doi.org/10.24963/ijcai.2021/152","title":"Audio2Head: Audio-driven One-shot Talking-head Generation with Natural Head Motion","display_name":"Audio2Head: Audio-driven One-shot Talking-head Generation with Natural Head Motion","publication_year":2021,"publication_date":"2021-08-01","ids":{"openalex":"https://openalex.org/W3187364420","doi":"https://doi.org/10.24963/ijcai.2021/152","mag":"3187364420"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2021/152","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/152","pdf_url":"https://www.ijcai.org/proceedings/2021/0152.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2021/0152.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101908213","display_name":"Suzhen Wang","orcid":"https://orcid.org/0000-0001-7271-4481"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Suzhen Wang","raw_affiliation_strings":["Virtual Human Group, Netease Fuxi AI Lab, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Virtual Human Group, Netease Fuxi AI Lab, China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019036797","display_name":"Lincheng Li","orcid":"https://orcid.org/0000-0002-6047-0472"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lincheng Li","raw_affiliation_strings":["Virtual Human Group, Netease Fuxi AI Lab, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Virtual Human Group, Netease Fuxi AI Lab, China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101853531","display_name":"Yu Ding","orcid":"https://orcid.org/0000-0003-1834-4429"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Ding","raw_affiliation_strings":["Virtual Human Group, Netease Fuxi AI Lab, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Virtual Human Group, Netease Fuxi AI Lab, China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022008180","display_name":"Changjie Fan","orcid":"https://orcid.org/0000-0001-5420-0516"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changjie Fan","raw_affiliation_strings":["Virtual Human Group, Netease Fuxi AI Lab, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Virtual Human Group, Netease Fuxi AI Lab, China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003076238","display_name":"Xin Yu","orcid":"https://orcid.org/0000-0002-0269-5649"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xin Yu","raw_affiliation_strings":["University of Technology Sydney"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Technology Sydney","institution_ids":["https://openalex.org/I114017466"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101908213"],"corresponding_institution_ids":["https://openalex.org/I4210091137"],"apc_list":null,"apc_paid":null,"fwci":9.601,"has_fulltext":false,"cited_by_count":141,"citation_normalized_percentile":{"value":0.98686249,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1098","last_page":"1105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9865000247955322,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.983299970626831,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7986438870429993},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7348384857177734},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.7168715596199036},{"id":"https://openalex.org/keywords/head","display_name":"Head (geology)","score":0.6220959424972534},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.605355978012085},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4356438219547272},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4339749217033386},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.43292033672332764},{"id":"https://openalex.org/keywords/motion-field","display_name":"Motion field","score":0.4121500253677368},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3326941728591919},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11479905247688293}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7986438870429993},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7348384857177734},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7168715596199036},{"id":"https://openalex.org/C2780312720","wikidata":"https://www.wikidata.org/wiki/Q5689100","display_name":"Head (geology)","level":2,"score":0.6220959424972534},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.605355978012085},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4356438219547272},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4339749217033386},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.43292033672332764},{"id":"https://openalex.org/C124774092","wikidata":"https://www.wikidata.org/wiki/Q6917782","display_name":"Motion field","level":3,"score":0.4121500253677368},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3326941728591919},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11479905247688293},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C114793014","wikidata":"https://www.wikidata.org/wiki/Q52109","display_name":"Geomorphology","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2021/152","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/152","pdf_url":"https://www.ijcai.org/proceedings/2021/0152.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2021/152","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/152","pdf_url":"https://www.ijcai.org/proceedings/2021/0152.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3187364420.pdf","grobid_xml":"https://content.openalex.org/works/W3187364420.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W56717636","https://openalex.org/W2015143272","https://openalex.org/W2133665775","https://openalex.org/W2726515241","https://openalex.org/W2747308422","https://openalex.org/W2790630725","https://openalex.org/W2807126412","https://openalex.org/W2895542678","https://openalex.org/W2943314296","https://openalex.org/W2944294033","https://openalex.org/W2946137858","https://openalex.org/W2960274051","https://openalex.org/W2963168844","https://openalex.org/W2963290645","https://openalex.org/W2963917969","https://openalex.org/W2964559396","https://openalex.org/W2979894294","https://openalex.org/W2984700035","https://openalex.org/W2998605827","https://openalex.org/W3008823916","https://openalex.org/W3034211329","https://openalex.org/W3034241236","https://openalex.org/W3035201761","https://openalex.org/W3081492798","https://openalex.org/W3097792222","https://openalex.org/W3107666850","https://openalex.org/W3109114891","https://openalex.org/W3176721746","https://openalex.org/W3197199219","https://openalex.org/W4288088427"],"related_works":["https://openalex.org/W4286646204","https://openalex.org/W1978569796","https://openalex.org/W2058759268","https://openalex.org/W2169823899","https://openalex.org/W2770661158","https://openalex.org/W2805267542","https://openalex.org/W2077412630","https://openalex.org/W2076226357","https://openalex.org/W2312240660","https://openalex.org/W2051121715"],"abstract_inverted_index":{"We":[0,51],"propose":[1],"an":[2,170],"audio-driven":[3],"talking-head":[4,9,179],"method":[5,156,198],"to":[6,93,125,176],"generate":[7],"photo-realistic":[8,178],"videos":[10,180,200],"from":[11,107,131,181],"a":[12,39,42,54,65,85,111,121,137],"single":[13],"reference":[14,138,191],"image.":[15,139,192],"In":[16,71],"this":[17,72,141],"work,":[18],"we":[19,109,119],"tackle":[20],"two":[21],"key":[22],"challenges:":[23],"(i)":[24],"producing":[25],"natural":[26],"head":[27,44,55,62,76,134,203],"motions":[28,105,147],"that":[29,196],"match":[30],"speech":[31],"prosody,":[32],"and":[33,136,152,162,188,208,211],"(ii)}":[34],"maintaining":[35],"the":[36,48,74,80,102,127,146,160,166,182,189,213],"appearance":[37],"of":[38,84,148,165],"speaker":[40],"in":[41],"large":[43],"motion":[45,115,122,129,186],"while":[46],"stabilizing":[47],"non-face":[49],"regions.":[50],"first":[52],"design":[53],"pose":[56],"predictor":[57],"by":[58],"modeling":[59],"rigid":[60],"6D":[61],"movements":[63,83],"with":[64,201],"motion-aware":[66],"recurrent":[67],"neural":[68],"network":[69,92,173],"(RNN).":[70],"way,":[73],"predicted":[75],"poses":[77],"act":[78],"as":[79],"low-frequency":[81],"holistic":[82],"talking":[86],"head,":[87,151],"thus":[88],"allowing":[89],"our":[90,155,197],"latter":[91],"focus":[94],"on":[95],"detailed":[96],"facial":[97,149,206],"movement":[98],"generation.":[99],"To":[100],"depict":[101],"entire":[103],"image":[104,171],"arising":[106],"audio,":[108,133],"exploit":[110],"keypoint":[112,142,184],"based":[113,143,185],"dense":[114,128],"field":[116,123],"representation.":[117],"Then,":[118],"develop":[120],"generator":[124],"produce":[126],"fields":[130,187],"input":[132,190],"poses,":[135],"As":[140],"representation":[144],"models":[145],"regions,":[150],"backgrounds":[153,210],"integrally,":[154],"can":[157],"better":[158],"constrain":[159],"spatial":[161],"temporal":[163],"consistency":[164],"generated":[167],"videos.":[168],"Finally,":[169],"generation":[172],"is":[174],"employed":[175],"render":[177],"estimated":[183],"Extensive":[193],"experiments":[194],"demonstrate":[195],"produces":[199],"plausible":[202],"motions,":[204],"synchronized":[205],"expressions,":[207],"stable":[209],"outperforms":[212],"state-of-the-art.":[214]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":35},{"year":2024,"cited_by_count":45},{"year":2023,"cited_by_count":38},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":4}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
