{"id":"https://openalex.org/W4385764101","doi":"https://doi.org/10.24963/ijcai.2023/650","title":"DiffuseStyleGesture: Stylized Audio-Driven Co-Speech Gesture Generation with Diffusion Models","display_name":"DiffuseStyleGesture: Stylized Audio-Driven Co-Speech Gesture Generation with Diffusion Models","publication_year":2023,"publication_date":"2023-08-01","ids":{"openalex":"https://openalex.org/W4385764101","doi":"https://doi.org/10.24963/ijcai.2023/650"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2023/650","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/650","pdf_url":"https://www.ijcai.org/proceedings/2023/0650.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2023/0650.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113092484","display_name":"Sicheng Yang","orcid":"https://orcid.org/0000-0002-0928-034X"},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sicheng Yang","raw_affiliation_strings":["Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100667025","display_name":"Zhiyong Wu","orcid":"https://orcid.org/0000-0002-6527-5502"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Zhiyong Wu","raw_affiliation_strings":["Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","The Chinese University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100780135","display_name":"Minglei Li","orcid":"https://orcid.org/0000-0002-1427-3507"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minglei Li","raw_affiliation_strings":["Huawei Cloud Computing Technologies Co., Ltd, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Cloud Computing Technologies Co., Ltd, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043901952","display_name":"Zhensong Zhang","orcid":"https://orcid.org/0009-0001-7911-7564"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhensong Zhang","raw_affiliation_strings":["Huawei Noah\u2019s Ark Lab, Shenzhen, China","Huawei Noah's Ark Lab, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah\u2019s Ark Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]},{"raw_affiliation_string":"Huawei Noah's Ark Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100605733","display_name":"Hao Lei","orcid":"https://orcid.org/0009-0009-6977-119X"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Hao","raw_affiliation_strings":["Huawei Noah\u2019s Ark Lab, Shenzhen, China","Huawei Noah's Ark Lab, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah\u2019s Ark Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]},{"raw_affiliation_string":"Huawei Noah's Ark Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036821828","display_name":"Weihong Bao","orcid":"https://orcid.org/0009-0008-4696-854X"},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihong Bao","raw_affiliation_strings":["Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110689941","display_name":"Ming Cheng","orcid":"https://orcid.org/0009-0003-3643-1726"},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Cheng","raw_affiliation_strings":["Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006373750","display_name":"Long Xiao","orcid":"https://orcid.org/0000-0002-5271-8554"},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Xiao","raw_affiliation_strings":["Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5113092484"],"corresponding_institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":13.6283,"has_fulltext":false,"cited_by_count":78,"citation_normalized_percentile":{"value":0.99337336,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5860","last_page":"5868"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.9110323190689087},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.787376880645752},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6894454956054688},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.449769526720047},{"id":"https://openalex.org/keywords/gesture-recognition","display_name":"Gesture recognition","score":0.4353329539299011},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3692304491996765}],"concepts":[{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.9110323190689087},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.787376880645752},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6894454956054688},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.449769526720047},{"id":"https://openalex.org/C159437735","wikidata":"https://www.wikidata.org/wiki/Q1519524","display_name":"Gesture recognition","level":3,"score":0.4353329539299011},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3692304491996765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2023/650","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/650","pdf_url":"https://www.ijcai.org/proceedings/2023/0650.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2023/650","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/650","pdf_url":"https://www.ijcai.org/proceedings/2023/0650.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.5699999928474426,"id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G2603945996","display_name":null,"funder_award_id":"62076144","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4385764101.pdf"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W2962795401","https://openalex.org/W2967443589","https://openalex.org/W3009042479","https://openalex.org/W3015468748","https://openalex.org/W3034561418","https://openalex.org/W3036167779","https://openalex.org/W3083173864","https://openalex.org/W3115266783","https://openalex.org/W3119219688","https://openalex.org/W3131922516","https://openalex.org/W3153551559","https://openalex.org/W3198131199","https://openalex.org/W3209984917","https://openalex.org/W4224035735","https://openalex.org/W4230429791","https://openalex.org/W4249736682","https://openalex.org/W4285981714","https://openalex.org/W4288099666","https://openalex.org/W4292719004","https://openalex.org/W4292945985","https://openalex.org/W4293352083","https://openalex.org/W4294530172","https://openalex.org/W4295838474","https://openalex.org/W4298186886","https://openalex.org/W4308222716","https://openalex.org/W4308222758","https://openalex.org/W4310832606","https://openalex.org/W4312671789","https://openalex.org/W4312674262","https://openalex.org/W4312719027","https://openalex.org/W4320085220","https://openalex.org/W4321351659","https://openalex.org/W4323240786","https://openalex.org/W4364377334","https://openalex.org/W4372341521","https://openalex.org/W4377010269","https://openalex.org/W4378083152","https://openalex.org/W4382203549","https://openalex.org/W4385245566","https://openalex.org/W4386075888","https://openalex.org/W4386075984","https://openalex.org/W4386076405"],"related_works":["https://openalex.org/W2066003895","https://openalex.org/W2902873204","https://openalex.org/W2185750513","https://openalex.org/W4312416068","https://openalex.org/W3147379364","https://openalex.org/W2010878661","https://openalex.org/W2026258298","https://openalex.org/W3204639664","https://openalex.org/W2970836791","https://openalex.org/W2805039731"],"abstract_inverted_index":{"The":[0,9],"art":[1],"of":[2,29,34,40,75,120],"communication":[3],"beyond":[4],"speech":[5,93],"there":[6],"are":[7,148],"gestures.":[8,97],"automatic":[10],"co-speech":[11,69],"gesture":[12,42,59,87,109,140],"generation":[13,60],"draws":[14],"much":[15],"attention":[16,82],"in":[17],"computer":[18],"animation.":[19],"It":[20,62],"is":[21],"a":[22,54],"challenging":[23],"task":[24],"due":[25],"to":[26,43,85,90,106],"the":[27,32,36,41,44,86,108,118],"diversity":[28,119],"gestures":[30,70,122,126],"and":[31,38,67,83,95,127,146],"difficulty":[33],"matching":[35],"rhythm":[37],"semantics":[39],"corresponding":[45],"speech.":[46],"To":[47],"address":[48],"these":[49],"problems,":[50],"we":[51,79,116],"present":[52],"DiffuseStyleGesture,":[53],"diffusion":[55,88],"model":[56,102],"based":[57,71],"speech-driven":[58,139],"approach.":[61],"generates":[63],"high-quality,":[64],"speech-matched,":[65],"stylized,":[66],"diverse":[68],"on":[72,138],"given":[73],"speeches":[74],"arbitrary":[76],"length.":[77],"Specifically,":[78],"introduce":[80],"cross-local":[81],"self-attention":[84],"pipeline":[89],"generate":[91],"better":[92],"matched":[94],"realistic":[96],"We":[98],"then":[99],"train":[100],"our":[101,133],"with":[103,123],"classifier-free":[104],"guidance":[105],"control":[107],"style":[110],"by":[111],"interpolation":[112],"or":[113],"extrapolation.":[114],"Additionally,":[115],"improve":[117],"generated":[121],"different":[124],"initial":[125],"noise.":[128],"Extensive":[129],"experiments":[130],"show":[131],"that":[132],"method":[134],"outperforms":[135],"recent":[136],"approaches":[137],"generation.":[141],"Our":[142],"code,":[143],"pre-trained":[144],"models,":[145],"demos":[147],"available":[149],"at":[150],"https://github.com/YoungSeng/DiffuseStyleGesture.":[151]},"counts_by_year":[{"year":2026,"cited_by_count":11},{"year":2025,"cited_by_count":30},{"year":2024,"cited_by_count":32},{"year":2023,"cited_by_count":5}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
