{"id":"https://openalex.org/W4406322703","doi":"https://doi.org/10.1109/tvcg.2025.3529611","title":"SpeechAct: Towards Generating Whole-Body Motion From Speech","display_name":"SpeechAct: Towards Generating Whole-Body Motion From Speech","publication_year":2025,"publication_date":"2025-01-13","ids":{"openalex":"https://openalex.org/W4406322703","doi":"https://doi.org/10.1109/tvcg.2025.3529611","pmid":"https://pubmed.ncbi.nlm.nih.gov/40030891"},"language":"en","primary_location":{"id":"doi:10.1109/tvcg.2025.3529611","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvcg.2025.3529611","pdf_url":null,"source":{"id":"https://openalex.org/S84775595","display_name":"IEEE Transactions on Visualization and Computer Graphics","issn_l":"1077-2626","issn":["1077-2626","1941-0506","2160-9306"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Visualization and Computer Graphics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022160293","display_name":"Jinsong Zhang","orcid":"https://orcid.org/0000-0001-9619-5030"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jinsong Zhang","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100569694","display_name":"Minjie Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minjie Zhu","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103102493","display_name":"Yuxiang Zhang","orcid":"https://orcid.org/0000-0002-8807-0825"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxiang Zhang","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074081252","display_name":"Zerong Zheng","orcid":"https://orcid.org/0000-0003-1339-2480"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zerong Zheng","raw_affiliation_strings":["NNKosmos Technology, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"NNKosmos Technology, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032875389","display_name":"Yebin Liu","orcid":"https://orcid.org/0000-0003-3215-0225"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yebin Liu","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100377578","display_name":"Kun Li","orcid":"https://orcid.org/0000-0003-2326-0166"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Li","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5022160293"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":2.3841,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.85853854,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"31","issue":"10","first_page":"6737","last_page":"6750"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9401000142097473,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9401000142097473,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9284999966621399,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9143000245094299,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8108972311019897},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.5777168869972229},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5437662601470947},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.44026362895965576},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3829762935638428},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3665800094604492},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3227923512458801}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8108972311019897},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.5777168869972229},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5437662601470947},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.44026362895965576},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3829762935638428},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3665800094604492},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3227923512458801}],"mesh":[{"descriptor_ui":"D000076142","descriptor_name":"Virtual Reality","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000076142","descriptor_name":"Virtual Reality","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003196","descriptor_name":"Computer Graphics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003196","descriptor_name":"Computer Graphics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009038","descriptor_name":"Motion","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009038","descriptor_name":"Motion","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009068","descriptor_name":"Movement","qualifier_ui":"Q000502","qualifier_name":"physiology","is_major_topic":false},{"descriptor_ui":"D009068","descriptor_name":"Movement","qualifier_ui":"Q000502","qualifier_name":"physiology","is_major_topic":false},{"descriptor_ui":"D013060","descriptor_name":"Speech","qualifier_ui":"Q000502","qualifier_name":"physiology","is_major_topic":true},{"descriptor_ui":"D013060","descriptor_name":"Speech","qualifier_ui":"Q000502","qualifier_name":"physiology","is_major_topic":true},{"descriptor_ui":"D021621","descriptor_name":"Imaging, Three-Dimensional","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D021621","descriptor_name":"Imaging, Three-Dimensional","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D051598","descriptor_name":"Whole Body Imaging","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D051598","descriptor_name":"Whole Body Imaging","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1109/tvcg.2025.3529611","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvcg.2025.3529611","pdf_url":null,"source":{"id":"https://openalex.org/S84775595","display_name":"IEEE Transactions on Visualization and Computer Graphics","issn_l":"1077-2626","issn":["1077-2626","1941-0506","2160-9306"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Visualization and Computer Graphics","raw_type":"journal-article"},{"id":"pmid:40030891","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40030891","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on visualization and computer graphics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W2011611983","https://openalex.org/W2027454453","https://openalex.org/W2235920218","https://openalex.org/W2296371640","https://openalex.org/W2612744935","https://openalex.org/W2883221003","https://openalex.org/W2922298118","https://openalex.org/W2962795401","https://openalex.org/W2963907666","https://openalex.org/W2967443589","https://openalex.org/W2971625077","https://openalex.org/W2978956737","https://openalex.org/W2981263323","https://openalex.org/W3002310794","https://openalex.org/W3009042479","https://openalex.org/W3083173864","https://openalex.org/W3097823560","https://openalex.org/W3098994456","https://openalex.org/W3108316907","https://openalex.org/W3109597949","https://openalex.org/W3115266783","https://openalex.org/W3167478287","https://openalex.org/W3171007011","https://openalex.org/W3188493377","https://openalex.org/W3194872882","https://openalex.org/W3198131199","https://openalex.org/W3206084531","https://openalex.org/W3209984917","https://openalex.org/W3216650889","https://openalex.org/W4200630629","https://openalex.org/W4214677627","https://openalex.org/W4230429791","https://openalex.org/W4303448003","https://openalex.org/W4312309978","https://openalex.org/W4312437946","https://openalex.org/W4312635677","https://openalex.org/W4312671789","https://openalex.org/W4312674262","https://openalex.org/W4312710790","https://openalex.org/W4313072274","https://openalex.org/W4316661068","https://openalex.org/W4322631681","https://openalex.org/W4362496432","https://openalex.org/W4377001520","https://openalex.org/W4377010269","https://openalex.org/W4385263018","https://openalex.org/W4385275452","https://openalex.org/W4385284180","https://openalex.org/W4385764101","https://openalex.org/W4386075984","https://openalex.org/W4386076154","https://openalex.org/W4386114518","https://openalex.org/W4388157164","https://openalex.org/W4388286240","https://openalex.org/W4388521736","https://openalex.org/W4390874263","https://openalex.org/W4400582137","https://openalex.org/W4400818936","https://openalex.org/W4402716181","https://openalex.org/W4402727140","https://openalex.org/W4404439890"],"related_works":["https://openalex.org/W2068608913","https://openalex.org/W3124914020","https://openalex.org/W2141033859","https://openalex.org/W2156434174","https://openalex.org/W2071701083","https://openalex.org/W2383687187","https://openalex.org/W2081517010","https://openalex.org/W2121496884","https://openalex.org/W2387910809","https://openalex.org/W2294221496"],"abstract_inverted_index":{"Whole-body":[0],"motion":[1,44,52,83,95,105,118,124,135,171,195],"generation":[2,84],"from":[3,24,85,106,139,152,173],"speech":[4,86,207],"audio":[5,109,154],"is":[6,72,220],"crucial":[7],"for":[8],"computer":[9],"graphics":[10],"and":[11,20,42,49,64,76,81,101,145,156,205],"immersive":[12],"VR/AR.":[13],"Prior":[14],"methods":[15],"struggle":[16],"to":[17,46,74,78,92,128,179,191,197],"produce":[18,180],"natural":[19,82],"diverse":[21],"whole-body":[22],"motions":[23,141],"speech.":[25],"In":[26,184],"this":[27],"paper,":[28],"we":[29,120,167,186],"introduce":[30],"a":[31,38,90,94,111,122,176,188],"novel":[32],"method,":[33],"named":[34],"SpeechAct,":[35],"based":[36],"on":[37],"hybrid":[39,55,99],"point":[40,56],"representation":[41,57,63],"contrastive":[43,123,177],"learning":[45,125],"boost":[47,115],"realism":[48],"diversity":[50,116],"in":[51,117],"generation.":[53],"Our":[54],"leverages":[58],"the":[59,104,107,129,133,140,169,198,202,206,212],"advantages":[60],"of":[61,67,142,215],"keypoint":[62],"surface":[65],"points":[66],"3D":[68],"body":[69],"model,":[70],"which":[71],"easy":[73],"learn":[75,93],"helps":[77],"achieve":[79],"smooth":[80],"audio.":[87,208],"We":[88,148],"design":[89],"VQ-VAE":[91],"codebook":[96],"using":[97,110,159,175],"our":[98,160,216],"presentation,":[100],"then":[102],"regress":[103],"input":[108],"translation":[112,161],"model.":[113,162,217],"To":[114],"generation,":[119],"propose":[121],"method":[126],"according":[127],"intuitive":[130],"idea":[131],"that":[132],"generated":[134],"should":[136],"be":[137],"different":[138],"other":[143,146,153,157],"audios":[144],"speakers.":[147],"collect":[149],"negative":[150,165],"samples":[151],"inputs":[155],"speakers":[158],"With":[163],"these":[164],"samples,":[166],"pull":[168],"current":[170],"away":[172],"them":[174],"loss":[178],"more":[181],"distinctive":[182],"representations.":[183],"addition,":[185],"compose":[187],"face":[189,194,203],"generator":[190],"generate":[192],"deterministic":[193],"due":[196],"strong":[199],"connection":[200],"between":[201],"movements":[204],"Experimental":[209],"results":[210],"validate":[211],"superior":[213],"performance":[214],"The":[218],"code":[219],"available":[221],"at":[222],"http://cic.tju.edu.cn/faculty/likun/projects/SpeechAct/index.html.":[223]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
