{"id":"https://openalex.org/W4387421716","doi":"https://doi.org/10.1145/3577190.3614135","title":"AQ-GT: a Temporally Aligned and Quantized GRU-Transformer for Co-Speech Gesture Synthesis","display_name":"AQ-GT: a Temporally Aligned and Quantized GRU-Transformer for Co-Speech Gesture Synthesis","publication_year":2023,"publication_date":"2023-10-07","ids":{"openalex":"https://openalex.org/W4387421716","doi":"https://doi.org/10.1145/3577190.3614135"},"language":"en","primary_location":{"id":"doi:10.1145/3577190.3614135","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3577190.3614135","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3577190.3614135","source":{"id":"https://openalex.org/S4363608440","display_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3577190.3614135","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081855415","display_name":"Hendric Vo\u00df","orcid":"https://orcid.org/0009-0003-3646-7702"},"institutions":[{"id":"https://openalex.org/I20121455","display_name":"Bielefeld University","ror":"https://ror.org/02hpadn98","country_code":"DE","type":"education","lineage":["https://openalex.org/I20121455"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Hendric Vo\u00df","raw_affiliation_strings":["Social Cognitive Systems - Cluster of Excellence Cognitive Interaction Technology (CITEC), Bielefeld University, Germany"],"raw_orcid":"https://orcid.org/0009-0003-3646-7702","affiliations":[{"raw_affiliation_string":"Social Cognitive Systems - Cluster of Excellence Cognitive Interaction Technology (CITEC), Bielefeld University, Germany","institution_ids":["https://openalex.org/I20121455"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043486945","display_name":"Stefan Kopp","orcid":"https://orcid.org/0000-0002-4047-9277"},"institutions":[{"id":"https://openalex.org/I20121455","display_name":"Bielefeld University","ror":"https://ror.org/02hpadn98","country_code":"DE","type":"education","lineage":["https://openalex.org/I20121455"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stefan Kopp","raw_affiliation_strings":["Cluster of Excellence Cognitive Interaction Technology (CITEC), Bielefeld University, Germany"],"raw_orcid":"https://orcid.org/0000-0002-4047-9277","affiliations":[{"raw_affiliation_string":"Cluster of Excellence Cognitive Interaction Technology (CITEC), Bielefeld University, Germany","institution_ids":["https://openalex.org/I20121455"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5081855415"],"corresponding_institution_ids":["https://openalex.org/I20121455"],"apc_list":null,"apc_paid":null,"fwci":2.5628,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.91071429,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"60","last_page":"69"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.8864219784736633},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7727261781692505},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.565185546875},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.444087952375412},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.43825405836105347},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.43774276971817017},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.43062078952789307},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35457077622413635},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2693265676498413}],"concepts":[{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.8864219784736633},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7727261781692505},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.565185546875},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.444087952375412},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.43825405836105347},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.43774276971817017},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.43062078952789307},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35457077622413635},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2693265676498413},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3577190.3614135","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3577190.3614135","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3577190.3614135","source":{"id":"https://openalex.org/S4363608440","display_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3577190.3614135","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3577190.3614135","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3577190.3614135","source":{"id":"https://openalex.org/S4363608440","display_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387421716.pdf","grobid_xml":"https://content.openalex.org/works/W4387421716.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W1208039178","https://openalex.org/W1991366868","https://openalex.org/W2007337857","https://openalex.org/W2046033161","https://openalex.org/W2293741035","https://openalex.org/W2294797155","https://openalex.org/W2611706523","https://openalex.org/W2792512374","https://openalex.org/W2802061317","https://openalex.org/W2871950322","https://openalex.org/W2896457183","https://openalex.org/W2901285216","https://openalex.org/W2903477993","https://openalex.org/W2945629925","https://openalex.org/W2962795401","https://openalex.org/W2962896489","https://openalex.org/W2964062189","https://openalex.org/W2982625143","https://openalex.org/W2986302981","https://openalex.org/W2991575677","https://openalex.org/W3036601975","https://openalex.org/W3048550213","https://openalex.org/W3083173864","https://openalex.org/W3096831136","https://openalex.org/W3098994456","https://openalex.org/W3107914916","https://openalex.org/W3115266783","https://openalex.org/W3125775899","https://openalex.org/W3180355996","https://openalex.org/W4221142137","https://openalex.org/W4281698221","https://openalex.org/W4292945985","https://openalex.org/W4296405085","https://openalex.org/W4303448003","https://openalex.org/W4308222673","https://openalex.org/W4312719027","https://openalex.org/W4312799843","https://openalex.org/W4321351659","https://openalex.org/W4364377334"],"related_works":["https://openalex.org/W2066003895","https://openalex.org/W2136763963","https://openalex.org/W2109705048","https://openalex.org/W2940588515","https://openalex.org/W1909151225","https://openalex.org/W1987783679","https://openalex.org/W2160030256","https://openalex.org/W1521297879","https://openalex.org/W4253235840","https://openalex.org/W3151937861"],"abstract_inverted_index":{"The":[0,67,168],"generation":[1,85,113,133,199],"of":[2,19,88,94,114,155,178],"realistic":[3,116],"and":[4,35,75,86,117,125,185,197],"contextually":[5],"relevant":[6],"co-speech":[7,32,147],"gestures":[8,45,119,148],"is":[9,186],"a":[10,28,59,64,95,106,182],"challenging":[11],"yet":[12],"increasingly":[13],"important":[14],"task":[15],"in":[16,77,131],"the":[17,81,84,92,112,132,175,179,198],"creation":[18],"multimodal":[20],"artificial":[21],"agents.":[22],"Prior":[23],"methods":[24,144],"focused":[25],"on":[26],"learning":[27,91],"direct":[29],"correspondence":[30],"between":[31],"gesture":[33,56],"representations":[34],"produced":[36],"motions,":[37],"which":[38],"created":[39],"seemingly":[40],"natural":[41],"but":[42],"often":[43],"unconvincing":[44],"during":[46],"human":[47,123,156,190],"assessment.":[48],"We":[49,135,158,192],"present":[50],"an":[51,161],"approach":[52,138,173],"to":[53,101,105,164],"pre-train":[54],"partial":[55],"sequences":[57],"using":[58],"generative":[60],"adversarial":[61],"network":[62],"with":[63,142,152],"quantization":[65],"pipeline.":[66],"resulting":[68],"codebook":[69],"vectors":[70],"serve":[71],"as":[72,99,149,151],"both":[73],"input":[74],"output":[76],"our":[78,137,166,172,194],"framework,":[79],"forming":[80],"basis":[82],"for":[83,145],"reconstruction":[87],"gestures.":[89],"By":[90],"mapping":[93,103],"latent":[96],"space":[97],"representation":[98],"opposed":[100],"directly":[102],"it":[104,141],"vector":[107],"representation,":[108],"this":[109],"framework":[110,200],"facilitates":[111],"highly":[115],"expressive":[118],"that":[120,171],"closely":[121],"replicate":[122],"movement":[124],"behavior,":[126],"while":[127],"simultaneously":[128],"avoiding":[129],"artifacts":[130],"process.":[134],"evaluate":[136],"by":[139,181],"comparing":[140],"established":[143],"generating":[146],"well":[150],"existing":[153],"datasets":[154],"behavior.":[157],"also":[159],"perform":[160],"ablation":[162],"study":[163],"assess":[165],"findings.":[167],"results":[169],"show":[170],"outperforms":[174],"current":[176],"state":[177],"art":[180],"clear":[183],"margin":[184],"partially":[187],"indistinguishable":[188],"from":[189],"gesturing.":[191],"make":[193],"data":[195],"pipeline":[196],"publicly":[201],"available.":[202]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
