{"id":"https://openalex.org/W7138876377","doi":"https://doi.org/10.48550/arxiv.2603.16936","title":"TDMM-LM: Bridging Facial Understanding and Animation via Language Models","display_name":"TDMM-LM: Bridging Facial Understanding and Animation via Language Models","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138876377","doi":"https://doi.org/10.48550/arxiv.2603.16936"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.16936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.16936","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130154333","display_name":"Luchuan Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Song, Luchuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017467599","display_name":"Pinxin Liu","orcid":"https://orcid.org/0009-0009-6538-7174"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Pinxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129933317","display_name":"Haiyang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Haiyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129809764","display_name":"Zhenchao Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Zhenchao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100669165","display_name":"Yunlong Tang","orcid":"https://orcid.org/0000-0003-2796-1787"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Yolo Yunlong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129904007","display_name":"Zichong Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Zichong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112553141","display_name":"Susan Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Susan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130213318","display_name":"Jing Bi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bi, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079301873","display_name":"Jason J. Corso","orcid":"https://orcid.org/0000-0001-6454-9594"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Corso, Jason J","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129938010","display_name":"Chenliang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Chenliang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5130154333"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.8483999967575073,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.8483999967575073,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.08579999953508377,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.013199999928474426,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-facial-animation","display_name":"Computer facial animation","score":0.8453999757766724},{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.7253999710083008},{"id":"https://openalex.org/keywords/facial-motion-capture","display_name":"Facial motion capture","score":0.5645999908447266},{"id":"https://openalex.org/keywords/computer-animation","display_name":"Computer animation","score":0.49939998984336853},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4796000123023987},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.47859999537467957},{"id":"https://openalex.org/keywords/motion-capture","display_name":"Motion capture","score":0.46480000019073486},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.399399995803833}],"concepts":[{"id":"https://openalex.org/C138591656","wikidata":"https://www.wikidata.org/wiki/Q5157538","display_name":"Computer facial animation","level":4,"score":0.8453999757766724},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.7253999710083008},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7038999795913696},{"id":"https://openalex.org/C98907195","wikidata":"https://www.wikidata.org/wiki/Q5428562","display_name":"Facial motion capture","level":5,"score":0.5645999908447266},{"id":"https://openalex.org/C69369342","wikidata":"https://www.wikidata.org/wiki/Q1401416","display_name":"Computer animation","level":3,"score":0.49939998984336853},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4796000123023987},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.47859999537467957},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.46480000019073486},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4609000086784363},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.399399995803833},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.3993000090122223},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.357699990272522},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.34869998693466187},{"id":"https://openalex.org/C44710944","wikidata":"https://www.wikidata.org/wiki/Q1813564","display_name":"Skeletal animation","level":5,"score":0.32499998807907104},{"id":"https://openalex.org/C2780905192","wikidata":"https://www.wikidata.org/wiki/Q2341604","display_name":"Facial muscles","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.32089999318122864},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.3084000051021576},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.30660000443458557},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.2948000133037567},{"id":"https://openalex.org/C3019007443","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3d model","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.2653999924659729},{"id":"https://openalex.org/C77660652","wikidata":"https://www.wikidata.org/wiki/Q150971","display_name":"Computer graphics","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2547000050544739}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.16936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.16936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7531197667121887,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Text-guided":[0],"human":[1],"body":[2],"animation":[3,9,175],"has":[4],"advanced":[5],"rapidly,":[6],"yet":[7],"facial":[8,18,36,52,61,83,96,122,145,174],"lags":[10],"due":[11],"to":[12,29,160],"the":[13,98,114,117,157],"scarcity":[14],"of":[15,35,51,94,120,152],"well-annotated,":[16],"text-paired":[17],"corpora.":[19],"To":[20,150],"close":[21],"this":[22,73,136,155],"gap,":[23],"we":[24,75],"leverage":[25],"foundation":[26],"generative":[27],"models":[28,78,139],"synthesize":[30,144],"a":[31,92,112,165,169],"large,":[32],"balanced":[33],"corpus":[34],"behavior.":[37],"We":[38],"design":[39],"prompts":[40],"suite":[41],"covering":[42],"emotions":[43],"and":[44,57,66,106,108,143,176],"head":[45],"motions,":[46],"generate":[47],"about":[48],"80":[49],"hours":[50],"videos":[53],"with":[54,147],"multiple":[55],"generators,":[56],"fit":[58],"per-frame":[59],"3D":[60,95,121],"parameters,":[62,97],"yielding":[63],"large-scale":[64],"(prompt":[65],"parameter)":[67],"pairs":[68],"for":[69,79,128,172],"training.":[70],"Building":[71],"on":[72],"dataset,":[74],"probe":[76],"language":[77,138,166],"bidirectional":[80],"competence":[81],"over":[82],"motion":[84,126,146,177],"via":[85,124],"two":[86],"complementary":[87],"tasks:":[88],"(1)":[89],"Motion2Language:":[90],"given":[91,111],"sequence":[93,119],"model":[99,115],"produces":[100],"natural-language":[101],"descriptions":[102],"capturing":[103],"content,":[104],"style,":[105],"dynamics;":[107],"(2)":[109],"Language2Motion:":[110],"prompt,":[113],"synthesizes":[116],"corresponding":[118],"parameters":[123],"quantized":[125],"tokens":[127],"downstream":[129],"animation.":[130],"Extensive":[131],"experiments":[132],"show":[133],"that":[134],"in":[135],"setting":[137],"can":[140],"both":[141],"interpret":[142],"strong":[148],"generalization.":[149],"best":[151],"our":[153],"knowledge,":[154],"is":[156],"first":[158],"work":[159],"cast":[161],"facial-parameter":[162],"modeling":[163],"as":[164],"problem,":[167],"establishing":[168],"unified":[170],"path":[171],"text-conditioned":[173],"understanding.":[178]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
