{"id":"https://openalex.org/W4415540380","doi":"https://doi.org/10.1145/3746027.3755140","title":"Contextual Gesture: Co-Speech Gesture Video Generation through Context-aware Gesture Representation","display_name":"Contextual Gesture: Co-Speech Gesture Video Generation through Context-aware Gesture Representation","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415540380","doi":"https://doi.org/10.1145/3746027.3755140"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755140","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755140","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3746027.3755140","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017467599","display_name":"Pinxin Liu","orcid":"https://orcid.org/0009-0009-6538-7174"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Pinxin Liu","raw_affiliation_strings":["University of Rochester, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"University of Rochester, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104273123","display_name":"Pengfei Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pengfei Zhang","raw_affiliation_strings":["University of California, Irvine, Irvine, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Irvine, Irvine, CA, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101717515","display_name":"Hyeongwoo Kim","orcid":"https://orcid.org/0000-0002-2509-8230"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Hyeongwoo Kim","raw_affiliation_strings":["Imperial College London, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Imperial College London, London, United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031673061","display_name":"Pablo Garrido","orcid":"https://orcid.org/0009-0001-8273-6737"},"institutions":[{"id":"https://openalex.org/I4210129384","display_name":"Aadi (United States)","ror":"https://ror.org/03880e418","country_code":"US","type":"company","lineage":["https://openalex.org/I4210129384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pablo Garrido","raw_affiliation_strings":["Flawless AI, Santa Monica, CA, USA"],"affiliations":[{"raw_affiliation_string":"Flawless AI, Santa Monica, CA, USA","institution_ids":["https://openalex.org/I4210129384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062006413","display_name":"Ari Shapiro","orcid":"https://orcid.org/0000-0001-8626-4119"},"institutions":[{"id":"https://openalex.org/I4210147804","display_name":"IntraMedical Imaging (United States)","ror":"https://ror.org/05qvwqt73","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147804"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ari Shapiro","raw_affiliation_strings":["FlawlessAI, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"FlawlessAI, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I4210147804"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108705955","display_name":"Kyle Olszewski","orcid":"https://orcid.org/0000-0001-8775-6879"},"institutions":[{"id":"https://openalex.org/I4210129384","display_name":"Aadi (United States)","ror":"https://ror.org/03880e418","country_code":"US","type":"company","lineage":["https://openalex.org/I4210129384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kyle Olszewski","raw_affiliation_strings":["Flawless AI, Santa Monica, CA, USA"],"affiliations":[{"raw_affiliation_string":"Flawless AI, Santa Monica, CA, USA","institution_ids":["https://openalex.org/I4210129384"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5017467599"],"corresponding_institution_ids":["https://openalex.org/I5388228"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38838324,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"9803","last_page":"9812"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13310","display_name":"Subtitles and Audiovisual Media","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13310","display_name":"Subtitles and Audiovisual Media","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.9502999782562256},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5817999839782715},{"id":"https://openalex.org/keywords/gesture-recognition","display_name":"Gesture recognition","score":0.5533000230789185},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5271999835968018},{"id":"https://openalex.org/keywords/synchronizing","display_name":"Synchronizing","score":0.5248000025749207},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.37400001287460327}],"concepts":[{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.9502999782562256},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7967000007629395},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5817999839782715},{"id":"https://openalex.org/C159437735","wikidata":"https://www.wikidata.org/wiki/Q1519524","display_name":"Gesture recognition","level":3,"score":0.5533000230789185},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5271999835968018},{"id":"https://openalex.org/C162932704","wikidata":"https://www.wikidata.org/wiki/Q1058791","display_name":"Synchronizing","level":3,"score":0.5248000025749207},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4975000023841858},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4065000116825104},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3743000030517578},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.37400001287460327},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33000001311302185},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.3257000148296356},{"id":"https://openalex.org/C62402345","wikidata":"https://www.wikidata.org/wiki/Q18921192","display_name":"Interaction technique","level":3,"score":0.3183000087738037},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.29600000381469727},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.2913999855518341},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.26669999957084656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755140","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755140","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3746027.3755140","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755140","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2191779130","https://openalex.org/W2331128040","https://openalex.org/W2603777577","https://openalex.org/W2962795401","https://openalex.org/W2962974533","https://openalex.org/W2984529706","https://openalex.org/W3098994456","https://openalex.org/W3178284600","https://openalex.org/W3196434877","https://openalex.org/W3204221554","https://openalex.org/W3205994442","https://openalex.org/W3209984917","https://openalex.org/W4303448003","https://openalex.org/W4304080460","https://openalex.org/W4312473638","https://openalex.org/W4312674262","https://openalex.org/W4386076250","https://openalex.org/W4386655536","https://openalex.org/W4390872636","https://openalex.org/W4392910795","https://openalex.org/W4402703119","https://openalex.org/W4402727178","https://openalex.org/W4402754111","https://openalex.org/W4404966542","https://openalex.org/W4413144375","https://openalex.org/W4413145441"],"related_works":[],"abstract_inverted_index":{"Co-speech":[0],"gesture":[1,37,56,76,100,119,128],"generation":[2,58,125],"is":[3],"crucial":[4],"for":[5,34],"creating":[6],"lifelike":[7],"avatars":[8],"and":[9,39,88,117,126],"enhancing":[10],"human-computer":[11],"interactions":[12],"by":[13],"synchronizing":[14],"gestures":[15],"with":[16,24],"speech.":[17],"Despite":[18],"recent":[19],"advancements,":[20],"existing":[21],"methods":[22],"struggle":[23],"accurately":[25],"identifying":[26],"the":[27],"rhythmic":[28],"or":[29],"semantic":[30],"triggers":[31],"from":[32],"audio":[33],"generating":[35],"contextualized":[36,75],"patterns":[38],"achieving":[40],"pixel-level":[41],"realism.":[42],"To":[43],"address":[44],"these":[45],"challenges,":[46],"we":[47],"introduce":[48],"Contextual":[49,111],"Gesture,":[50],"a":[51,64,74,90],"framework":[52],"that":[53,68,78,94,110],"improves":[54],"co-speech":[55],"video":[57,104,127],"through":[59,86],"three":[60],"innovative":[61],"components:":[62],"(1)":[63],"chronological":[65],"speech-gesture":[66],"alignment":[67],"temporally":[69],"connects":[70],"two":[71],"modalities,":[72],"(2)":[73],"tokenization":[77],"incorporate":[79],"speech":[80],"context":[81],"into":[82],"motion":[83],"pattern":[84],"representation":[85],"distillation,":[87],"(3)":[89],"structure-aware":[91],"refinement":[92],"module":[93],"employs":[95],"edge":[96],"connection":[97],"to":[98,102],"link":[99],"keypoints":[101],"improve":[103],"generation.":[105],"Our":[106],"extensive":[107],"experiments":[108],"demonstrate":[109],"Gesture":[112],"not":[113],"only":[114],"produces":[115],"realistic":[116],"speech-aligned":[118],"videos":[120],"but":[121],"also":[122],"supports":[123],"long-sequence":[124],"editing":[129],"applications,":[130],"shown":[131],"in":[132],"Fig.1":[133]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-25T00:00:00"}
