{"id":"https://openalex.org/W4408355595","doi":"https://doi.org/10.1109/icassp49660.2025.10889031","title":"Multi-Modal Rhythmic Generative Model for Chinese Cued Speech Gestures Generation","display_name":"Multi-Modal Rhythmic Generative Model for Chinese Cued Speech Gestures Generation","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408355595","doi":"https://doi.org/10.1109/icassp49660.2025.10889031"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10889031","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889031","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100418783","display_name":"Li Liu","orcid":"https://orcid.org/0000-0002-2011-2873"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li Liu","raw_affiliation_strings":["HKUST(GZ),Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"HKUST(GZ),Guangzhou,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045616176","display_name":"Wentao Lei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wentao Lei","raw_affiliation_strings":["HKUST(GZ),Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"HKUST(GZ),Guangzhou,China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100676721","display_name":"Wenwu Wang","orcid":"https://orcid.org/0000-0002-8393-5703"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Wenwu Wang","raw_affiliation_strings":["University of Surrey,Guildford,England"],"affiliations":[{"raw_affiliation_string":"University of Surrey,Guildford,England","institution_ids":["https://openalex.org/I28290843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100418783"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05721695,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9545000195503235,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.927299976348877,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cued-speech","display_name":"Cued speech","score":0.7222849130630493},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7081061601638794},{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.6566528677940369},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6506352424621582},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5961340069770813},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5940585136413574},{"id":"https://openalex.org/keywords/rhythm","display_name":"Rhythm","score":0.5328695178031921},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.4953969717025757},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.432186484336853},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3240665793418884},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.16262143850326538},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1321888566017151},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.06591132283210754},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.0471150279045105}],"concepts":[{"id":"https://openalex.org/C83195618","wikidata":"https://www.wikidata.org/wiki/Q590951","display_name":"Cued speech","level":2,"score":0.7222849130630493},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7081061601638794},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.6566528677940369},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6506352424621582},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5961340069770813},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5940585136413574},{"id":"https://openalex.org/C135343436","wikidata":"https://www.wikidata.org/wiki/Q170406","display_name":"Rhythm","level":2,"score":0.5328695178031921},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.4953969717025757},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.432186484336853},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3240665793418884},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.16262143850326538},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1321888566017151},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.06591132283210754},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0471150279045105},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10889031","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889031","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W430830562","https://openalex.org/W1972680065","https://openalex.org/W2013640163","https://openalex.org/W2028778923","https://openalex.org/W2062826588","https://openalex.org/W2115410550","https://openalex.org/W2122399640","https://openalex.org/W2157331557","https://openalex.org/W2493916176","https://openalex.org/W2799813293","https://openalex.org/W2888888638","https://openalex.org/W2901285216","https://openalex.org/W2963744813","https://openalex.org/W2972620384","https://openalex.org/W2998687373","https://openalex.org/W3002310794","https://openalex.org/W3008402854","https://openalex.org/W3009042479","https://openalex.org/W3083173864","https://openalex.org/W3113399631","https://openalex.org/W3188493377","https://openalex.org/W3194872882","https://openalex.org/W4294754026","https://openalex.org/W4303448003","https://openalex.org/W4375868832","https://openalex.org/W4386076458","https://openalex.org/W4391759831","https://openalex.org/W4401024305","https://openalex.org/W6606634433","https://openalex.org/W6628822853","https://openalex.org/W6631104366","https://openalex.org/W6682537320","https://openalex.org/W6749825310","https://openalex.org/W6779823529","https://openalex.org/W6791353385","https://openalex.org/W6856783564","https://openalex.org/W6870040498"],"related_works":["https://openalex.org/W2791807133","https://openalex.org/W2537963312","https://openalex.org/W2066003895","https://openalex.org/W2537762514","https://openalex.org/W2349788282","https://openalex.org/W577271088","https://openalex.org/W2120801881","https://openalex.org/W1982853263","https://openalex.org/W1974473538","https://openalex.org/W2020010749"],"abstract_inverted_index":{"Cued":[0],"speech":[1],"(CS)":[2],"is":[3,80],"a":[4,83,102,111,145,186],"novel":[5],"visual":[6],"coding":[7,84],"system,":[8],"which":[9,79,109],"combines":[10],"lip":[11,35,167],"reading":[12],"with":[13,190,207],"several":[14],"specific":[15],"hand":[16,37,125,128,164],"codings":[17],"to":[18,22,63,117,137,152],"help":[19],"hearing-impaired":[20],"people":[21],"communicate":[23],"effectively.":[24],"This":[25],"work":[26,42],"focuses":[27],"on":[28,174],"the":[29,48,71,96,139,154,158,198],"audio/text-driven":[30],"CS":[31,50,76,105,121,163,181],"gestures":[32,38,122,165],"(i.e.,":[33,123],"continuous":[34],"and":[36,127,131,166,185,194,205],"movements)":[39],"generation.":[40,51],"Previous":[41],"used":[43],"template-based":[44],"statistical":[45],"methods":[46,54],"for":[47,82],"French":[49],"However,":[52],"these":[53],"are":[55,172],"fragile":[56],"since":[57],"they":[58],"need":[59],"careful":[60],"hand-crafted":[61],"pre-processing":[62],"fit":[64],"models,":[65],"resulting":[66],"in":[67,74,91],"poor":[68],"robustness.":[69],"Furthermore,":[70],"natural":[72],"rhythm":[73,134,140,155],"generated":[75],"gesture":[77,106],"sequences,":[78],"essential":[81],"system":[85],"of":[86,160,177,200],"spoken":[87],"languages,":[88],"was":[89],"overlooked":[90],"prior":[92],"studies.":[93],"To":[94],"solve":[95],"above-mentioned":[97],"problems,":[98],"we":[99,143],"innovatively":[100],"propose":[101],"two-branched":[103],"rhythmic":[104],"generation":[107],"framework,":[108],"contains":[110],"multi-modal":[112,120,208],"adversarial":[113],"semantic":[114],"generator":[115,135],"(MASG)":[116],"generate":[118],"accurate":[119],"lip,":[124],"shape":[126],"position":[129],"movements),":[130],"an":[132],"audio-driven":[133],"(ARG)":[136],"extract":[138],"information.":[141],"Moreover,":[142],"design":[144],"new":[146],"Gesture":[147],"Audio":[148],"Difference":[149],"(GAD)":[150],"metric":[151],"evaluate":[153],"coherence":[156],"considering":[157],"issue":[159],"asynchrony":[161],"between":[162],"movements.":[168],"Extensive":[169],"experimental":[170],"results":[171],"presented":[173],"two":[175,178],"datasets":[176],"tasks":[179],"(a":[180],"dataset":[182,206],"named":[183],"MCCS-2024":[184],"co-speech":[187],"TED":[188],"dataset)":[189],"comprehensive":[191],"ablation":[192],"analysis":[193],"user":[195],"study,":[196],"demonstrating":[197],"effectiveness":[199],"our":[201],"method.":[202],"The":[203],"code":[204],"annotations":[209],"were":[210],"made":[211],"public":[212],"at":[213],"https://mccs-2024.github.io/.":[214]},"counts_by_year":[],"updated_date":"2026-02-26T08:16:20.718346","created_date":"2025-10-10T00:00:00"}
