{"id":"https://openalex.org/W7154354839","doi":"https://doi.org/10.48550/arxiv.2604.10927","title":"LiveGesture Streamable Co-Speech Gesture Generation Model","display_name":"LiveGesture Streamable Co-Speech Gesture Generation Model","publication_year":2026,"publication_date":"2026-04-13","ids":{"openalex":"https://openalex.org/W7154354839","doi":"https://doi.org/10.48550/arxiv.2604.10927"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10927","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10927","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10927","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101325356","display_name":"Muhammad Usama Saleem","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saleem, Muhammad Usama","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133597732","display_name":"Mayur Jagdishbhai Patel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patel, Mayur Jagdishbhai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075678858","display_name":"Ekkasit Pinyoanuntapong","orcid":"https://orcid.org/0000-0001-7314-3836"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pinyoanuntapong, Ekkasit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133588970","display_name":"Zhongxing Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Zhongxing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133573364","display_name":"Li Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133618833","display_name":"Hongfei Xue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xue, Hongfei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048781653","display_name":"Ahmed Helmy","orcid":"https://orcid.org/0000-0003-1878-3536"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Helmy, Ahmed","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133561263","display_name":"Chen Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133564388","display_name":"Pu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Pu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.21400000154972076,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.21400000154972076,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.2102999985218048,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.1266999989748001,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.6660000085830688},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6269000172615051},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6244999766349792},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.5523999929428101},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.40450000762939453},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.3547999858856201},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.32600000500679016}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8154000043869019},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.6660000085830688},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6269000172615051},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6244999766349792},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.5523999929428101},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5134999752044678},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4433000087738037},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.412200003862381},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.40450000762939453},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.3547999858856201},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.32600000500679016},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.32120001316070557},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.3025999963283539},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2824000120162964},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.2743000090122223},{"id":"https://openalex.org/C172849965","wikidata":"https://www.wikidata.org/wiki/Q3148875","display_name":"Reference frame","level":3,"score":0.2651999890804291},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10927","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10927","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10927","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10927","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5011624693870544,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"propose":[1],"LiveGesture,":[2],"the":[3,51,65,73,82,180,191],"first":[4],"fully":[5],"streamable,":[6],"speech-driven":[7],"full-body":[8,202],"gesture":[9,25],"generation":[10,32],"framework":[11],"that":[12,195],"operates":[13],"with":[14],"zero":[15,215],"look-ahead":[16,216],"and":[17,33,72,128,137,161,174,200],"supports":[18],"arbitrary":[19],"sequence":[20,84],"length.":[21],"Unlike":[22],"existing":[23],"co-speech":[24],"methods,":[26],"which":[27,169],"are":[28,140],"designed":[29],"for":[30,54,115],"offline":[31,211],"either":[34],"treat":[35],"body":[36,87,117],"regions":[37],"independently":[38],"or":[39,208],"entangle":[40],"all":[41],"joints":[42],"within":[43],"a":[44,150],"single":[45],"model,":[46],"LiveGesture":[47,59,196],"is":[48],"built":[49],"from":[50],"ground":[52],"up":[53],"causal,":[55,90],"region-coordinated":[56],"motion":[57,83,92,113,131],"generation.":[58],"consists":[60],"of":[61,85,101],"two":[62],"main":[63],"modules:":[64],"Streamable":[66],"Vector":[67],"Quantized":[68],"Motion":[69],"Tokenizer":[70],"(SVQ)":[71],"Hierarchical":[74],"Autoregressive":[75],"Transformer":[76],"(HAR).":[77],"The":[78],"SVQ":[79],"tokenizer":[80],"converts":[81],"each":[86,116],"region":[88,176],"into":[89],"discrete":[91],"tokens,":[93],"enabling":[94],"real-time,":[95],"streamable":[96,151],"token":[97,172],"decoding.":[98],"On":[99],"top":[100],"SVQ,":[102],"HAR":[103],"employs":[104],"region-expert":[105],"autoregressive":[106,166],"(xAR)":[107],"transformers":[108],"to":[109,178,182],"model":[110,181],"expressive,":[111],"fine-grained":[112],"dynamics":[114,132],"region.":[118],"A":[119],"causal":[120,152],"spatio-temporal":[121],"fusion":[122],"module":[123],"(xAR":[124],"Fusion)":[125],"then":[126],"captures":[127],"integrates":[129],"correlated":[130],"across":[133],"regions.":[134],"Both":[135],"xAR":[136,138],"Fusion":[139],"conditioned":[141],"on":[142,190],"live,":[143],"continuously":[144],"arriving":[145],"audio":[146,153],"signals":[147],"encoded":[148],"by":[149],"encoder.":[154],"To":[155],"enhance":[156],"robustness":[157],"under":[158,213],"streaming":[159],"noise":[160],"prediction":[162],"errors,":[163],"we":[164],"introduce":[165],"masking":[167,173,177],"training,":[168],"leverages":[170],"uncertainty-guided":[171],"random":[175],"expose":[179],"imperfect,":[183],"partially":[184],"erroneous":[185],"histories":[186],"during":[187],"training.":[188],"Experiments":[189],"BEAT2":[192],"dataset":[193],"demonstrate":[194],"produces":[197],"coherent,":[198],"diverse,":[199],"beat-synchronous":[201],"gestures":[203],"in":[204],"real":[205],"time,":[206],"matching":[207],"surpassing":[209],"state-of-the-art":[210],"methods":[212],"true":[214],"conditions.":[217]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-15T00:00:00"}
