{"id":"https://openalex.org/W4372349426","doi":"https://doi.org/10.1109/icassp49357.2023.10095344","title":"MPE4G : Multimodal Pretrained Encoder for Co-Speech Gesture Generation","display_name":"MPE4G : Multimodal Pretrained Encoder for Co-Speech Gesture Generation","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372349426","doi":"https://doi.org/10.1109/icassp49357.2023.10095344"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095344","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095344","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002789888","display_name":"Gwantae Kim","orcid":"https://orcid.org/0000-0002-3239-1865"},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Gwantae Kim","raw_affiliation_strings":["Korea University,School of Electrical Engineering,Seoul,South Korea","School of Electrical Engineering, Korea University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Korea University,School of Electrical Engineering,Seoul,South Korea","institution_ids":["https://openalex.org/I197347611"]},{"raw_affiliation_string":"School of Electrical Engineering, Korea University, Seoul, South Korea","institution_ids":["https://openalex.org/I197347611"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066456474","display_name":"Seonghyeok Noh","orcid":null},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Seonghyeok Noh","raw_affiliation_strings":["Korea University,School of Electrical Engineering,Seoul,South Korea","School of Electrical Engineering, Korea University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Korea University,School of Electrical Engineering,Seoul,South Korea","institution_ids":["https://openalex.org/I197347611"]},{"raw_affiliation_string":"School of Electrical Engineering, Korea University, Seoul, South Korea","institution_ids":["https://openalex.org/I197347611"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003847195","display_name":"Insung Ham","orcid":null},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Insung Ham","raw_affiliation_strings":["Korea University,School of Electrical Engineering,Seoul,South Korea","School of Electrical Engineering, Korea University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Korea University,School of Electrical Engineering,Seoul,South Korea","institution_ids":["https://openalex.org/I197347611"]},{"raw_affiliation_string":"School of Electrical Engineering, Korea University, Seoul, South Korea","institution_ids":["https://openalex.org/I197347611"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026204977","display_name":"Hanseok Ko","orcid":"https://orcid.org/0000-0002-8744-4514"},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hanseok Ko","raw_affiliation_strings":["Korea University,School of Electrical Engineering,Seoul,South Korea","School of Electrical Engineering, Korea University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Korea University,School of Electrical Engineering,Seoul,South Korea","institution_ids":["https://openalex.org/I197347611"]},{"raw_affiliation_string":"School of Electrical Engineering, Korea University, Seoul, South Korea","institution_ids":["https://openalex.org/I197347611"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5002789888"],"corresponding_institution_ids":["https://openalex.org/I197347611"],"apc_list":null,"apc_paid":null,"fwci":1.6459,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.83757702,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.9376952648162842},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7668572664260864},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.7217203974723816},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6289995908737183},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5560288429260254},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.533132791519165},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49360740184783936},{"id":"https://openalex.org/keywords/gesture-recognition","display_name":"Gesture recognition","score":0.45097655057907104},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4236133396625519},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3345314860343933}],"concepts":[{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.9376952648162842},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7668572664260864},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.7217203974723816},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6289995908737183},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5560288429260254},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.533132791519165},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49360740184783936},{"id":"https://openalex.org/C159437735","wikidata":"https://www.wikidata.org/wiki/Q1519524","display_name":"Gesture recognition","level":3,"score":0.45097655057907104},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4236133396625519},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3345314860343933},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095344","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095344","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5199999809265137,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1485951484","https://openalex.org/W1873695396","https://openalex.org/W2008208299","https://openalex.org/W2024536104","https://openalex.org/W2078937684","https://openalex.org/W2101032778","https://openalex.org/W2235920218","https://openalex.org/W2493916176","https://openalex.org/W2616222580","https://openalex.org/W2719034351","https://openalex.org/W2896457183","https://openalex.org/W2962711404","https://openalex.org/W2962730651","https://openalex.org/W2962795401","https://openalex.org/W2962896489","https://openalex.org/W2967443589","https://openalex.org/W2978956737","https://openalex.org/W2997510589","https://openalex.org/W3009385801","https://openalex.org/W3083173864","https://openalex.org/W3090839706","https://openalex.org/W3110095664","https://openalex.org/W3125775899","https://openalex.org/W3194872882","https://openalex.org/W3196782138","https://openalex.org/W4285308163","https://openalex.org/W4292945985","https://openalex.org/W4308238098","https://openalex.org/W4312674262","https://openalex.org/W4312719027","https://openalex.org/W4385245566","https://openalex.org/W6639538794","https://openalex.org/W6723250868","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6756633688","https://openalex.org/W6774717684","https://openalex.org/W6782187349"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W203959209","https://openalex.org/W2167701463","https://openalex.org/W2110287964","https://openalex.org/W4307407935"],"abstract_inverted_index":{"When":[0],"virtual":[1],"agents":[2],"interact":[3],"with":[4,13,58,76],"humans,":[5],"gestures":[6,45,90,102,122],"are":[7,34,129,137],"crucial":[8],"to":[9,27,79,97,103],"delivering":[10],"their":[11],"intentions":[12],"speech.":[14],"Previous":[15],"multimodal":[16,60],"co-speech":[17,64,121],"gesture":[18,65],"generation":[19],"models":[20],"required":[21],"encoded":[22],"features":[23],"of":[24,93,111],"all":[25,126],"modalities":[26,33,128,136],"generate":[28,43],"gestures.":[29],"If":[30],"some":[31],"input":[32,127,135],"removed":[35],"or":[36,139],"contain":[37,80],"noise,":[38],"the":[39,44,68,71,81,104,109,116,134],"model":[40],"may":[41],"not":[42,123],"properly.":[46],"To":[47],"acquire":[48],"robust":[49],"and":[50,100,113],"generalized":[51],"encodings,":[52],"we":[53,87],"propose":[54],"a":[55,59],"novel":[56],"framework":[57],"pre-trained":[61],"encoder":[62,73],"for":[63],"generation.":[66],"In":[67],"proposed":[69,117],"method,":[70],"multi-head-attention-based":[72],"is":[74,144],"trained":[75],"self-supervised":[77],"learning":[78],"information":[82],"on":[83],"each":[84],"modality.":[85],"Moreover,":[86],"collect":[88],"full-body":[89],"that":[91],"consist":[92],"3D":[94],"joint":[95],"rotations":[96],"improve":[98],"visualization":[99],"apply":[101],"extensible":[105],"body":[106],"model.":[107],"Through":[108],"series":[110],"experiments":[112],"human":[114],"evaluation,":[115],"method":[118],"renders":[119],"realistic":[120],"only":[124],"when":[125,133],"given":[130],"but":[131],"also":[132],"missing":[138],"noisy.":[140],"The":[141],"project":[142],"page":[143],"available":[145],"here":[146],"<sup":[147],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[148],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[149]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
