{"id":"https://openalex.org/W7117471270","doi":"https://doi.org/10.1109/tvcg.2025.3649047","title":"Think2Sing: Orchestrating Structured Motion Subtitles for Singing-Driven 3D Head Animation","display_name":"Think2Sing: Orchestrating Structured Motion Subtitles for Singing-Driven 3D Head Animation","publication_year":2025,"publication_date":"2025-12-29","ids":{"openalex":"https://openalex.org/W7117471270","doi":"https://doi.org/10.1109/tvcg.2025.3649047","pmid":"https://pubmed.ncbi.nlm.nih.gov/41460905"},"language":"en","primary_location":{"id":"doi:10.1109/tvcg.2025.3649047","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvcg.2025.3649047","pdf_url":null,"source":{"id":"https://openalex.org/S84775595","display_name":"IEEE Transactions on Visualization and Computer Graphics","issn_l":"1077-2626","issn":["1077-2626","1941-0506","2160-9306"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Visualization and Computer Graphics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108993050","display_name":"Zikai Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zikai Huang","raw_affiliation_strings":["School of Computer Science and Engineering, South China University of Technology, Guangdong, China"],"raw_orcid":"https://orcid.org/0009-0005-4526-440X","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, South China University of Technology, Guangdong, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121504821","display_name":"Yihan Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yihan Zhou","raw_affiliation_strings":["School of Computer Science and Engineering, South China University of Technology, Guangdong, China"],"raw_orcid":"https://orcid.org/0009-0000-6869-780X","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, South China University of Technology, Guangdong, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xuemiao Xu","orcid":"https://orcid.org/0000-0002-8006-3663"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuemiao Xu","raw_affiliation_strings":["School of Computer Science and Engineering, South China University of Technology, Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0002-8006-3663","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, South China University of Technology, Guangdong, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101588348","display_name":"Cheng Xu","orcid":"https://orcid.org/0000-0002-4281-6214"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Cheng Xu","raw_affiliation_strings":["Centre for Smart Health, Hong Kong Polytechnic University, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-4281-6214","affiliations":[{"raw_affiliation_string":"Centre for Smart Health, Hong Kong Polytechnic University, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121466565","display_name":"Xiaofen Xing","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofen Xing","raw_affiliation_strings":["School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-0016-9055","affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121496959","display_name":"Jing Qin","orcid":null},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jing Qin","raw_affiliation_strings":["Centre for Smart Health, Hong Kong Polytechnic University, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-2961-0860","affiliations":[{"raw_affiliation_string":"Centre for Smart Health, Hong Kong Polytechnic University, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056103024","display_name":"Shengfeng He","orcid":"https://orcid.org/0000-0002-3802-4644"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Shengfeng He","raw_affiliation_strings":["School of Computing and Information Systems, Singapore Management University, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-3802-4644","affiliations":[{"raw_affiliation_string":"School of Computing and Information Systems, Singapore Management University, Singapore","institution_ids":["https://openalex.org/I79891267"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5108993050"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.62461694,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"32","issue":"3","first_page":"2568","last_page":"2582"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.5077999830245972,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.5077999830245972,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.1477999985218048,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.08129999786615372,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.8122000098228455},{"id":"https://openalex.org/keywords/computer-facial-animation","display_name":"Computer facial animation","score":0.6672999858856201},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.5971999764442444},{"id":"https://openalex.org/keywords/computer-animation","display_name":"Computer animation","score":0.5026999711990356},{"id":"https://openalex.org/keywords/motion-capture","display_name":"Motion capture","score":0.4684000015258789},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4345000088214874},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4318000078201294},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.42829999327659607}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8916000127792358},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.8122000098228455},{"id":"https://openalex.org/C138591656","wikidata":"https://www.wikidata.org/wiki/Q5157538","display_name":"Computer facial animation","level":4,"score":0.6672999858856201},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.5971999764442444},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5203999876976013},{"id":"https://openalex.org/C69369342","wikidata":"https://www.wikidata.org/wiki/Q1401416","display_name":"Computer animation","level":3,"score":0.5026999711990356},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.4684000015258789},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4345000088214874},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4318000078201294},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.42829999327659607},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.42179998755455017},{"id":"https://openalex.org/C98907195","wikidata":"https://www.wikidata.org/wiki/Q5428562","display_name":"Facial motion capture","level":5,"score":0.3944000005722046},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3659999966621399},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34459999203681946},{"id":"https://openalex.org/C2780312720","wikidata":"https://www.wikidata.org/wiki/Q5689100","display_name":"Head (geology)","level":2,"score":0.34310001134872437},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.30550000071525574},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.28040000796318054},{"id":"https://openalex.org/C44710944","wikidata":"https://www.wikidata.org/wiki/Q1813564","display_name":"Skeletal animation","level":5,"score":0.27239999175071716},{"id":"https://openalex.org/C60851780","wikidata":"https://www.wikidata.org/wiki/Q17056736","display_name":"Interactive skeleton-driven simulation","level":5,"score":0.26179999113082886}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tvcg.2025.3649047","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvcg.2025.3649047","pdf_url":null,"source":{"id":"https://openalex.org/S84775595","display_name":"IEEE Transactions on Visualization and Computer Graphics","issn_l":"1077-2626","issn":["1077-2626","1941-0506","2160-9306"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Visualization and Computer Graphics","raw_type":"journal-article"},{"id":"pmid:41460905","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41460905","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on visualization and computer graphics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8005062937736511,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W1588539311","https://openalex.org/W1807142569","https://openalex.org/W1990883837","https://openalex.org/W2002596037","https://openalex.org/W2057922088","https://openalex.org/W2070267188","https://openalex.org/W2468212864","https://openalex.org/W2737658251","https://openalex.org/W2739192055","https://openalex.org/W2763782214","https://openalex.org/W2769666294","https://openalex.org/W2791740949","https://openalex.org/W2804619907","https://openalex.org/W2946287218","https://openalex.org/W2964203186","https://openalex.org/W2981263323","https://openalex.org/W2981921393","https://openalex.org/W2997510589","https://openalex.org/W3081192838","https://openalex.org/W3099284785","https://openalex.org/W3106759947","https://openalex.org/W3153832461","https://openalex.org/W3154411171","https://openalex.org/W3180794345","https://openalex.org/W4200630629","https://openalex.org/W4244497365","https://openalex.org/W4288079574","https://openalex.org/W4297981470","https://openalex.org/W4312590328","https://openalex.org/W4312671789","https://openalex.org/W4380994134","https://openalex.org/W4385801342","https://openalex.org/W4386065807","https://openalex.org/W4386076250","https://openalex.org/W4387967971","https://openalex.org/W4387969119","https://openalex.org/W4388157164","https://openalex.org/W4389212256","https://openalex.org/W4390871712","https://openalex.org/W4390872297","https://openalex.org/W4390872742","https://openalex.org/W4390873123","https://openalex.org/W4390874567","https://openalex.org/W4391305822","https://openalex.org/W4393153464","https://openalex.org/W4394825433","https://openalex.org/W4394896914","https://openalex.org/W4399527576","https://openalex.org/W4400315380","https://openalex.org/W4400573497","https://openalex.org/W4400581045","https://openalex.org/W4402402001","https://openalex.org/W4403780616","https://openalex.org/W4405078945","https://openalex.org/W4407597372","https://openalex.org/W4409262657","https://openalex.org/W4409368359","https://openalex.org/W4409370054","https://openalex.org/W4411112886","https://openalex.org/W4411472278","https://openalex.org/W4413145620","https://openalex.org/W4415366882","https://openalex.org/W6907631384"],"related_works":[],"abstract_inverted_index":{"Singing-driven":[0],"3D":[1,78,158],"head":[2,79,127],"animation":[3,122,128,199],"is":[4,91],"a":[5,61,97,103],"compelling":[6],"yet":[7],"underexplored":[8],"task":[9],"with":[10,107,156],"broad":[11],"applications":[12],"in":[13,182],"virtual":[14],"avatars,":[15],"entertainment,":[16],"and":[17,38,75,85,140,162,169,185,197],"education.":[18],"Existing":[19],"speech-driven":[20],"approaches,":[21],"which":[22],"typically":[23],"map":[24],"audio":[25],"directly":[26],"to":[27,71,88],"motion":[28,95,130,164,171],"through":[29],"implicit":[30],"phoneme-to-viseme":[31],"correspondences,":[32],"often":[33],"yield":[34],"over-smoothed,":[35],"emotionally":[36],"flat,":[37],"semantically":[39,73,167],"inconsistent":[40],"results.":[41],"These":[42,110],"limitations":[43],"render":[44],"them":[45],"inadequate":[46],"for":[47,121],"the":[48,92,151],"unique":[49],"demands":[50],"of":[51,94],"singing-driven":[52],"animation.":[53],"To":[54,145],"address":[55],"this":[56,147],"challenge,":[57],"we":[58,149],"propose":[59],"Think2Sing,":[60],"unified":[62],"diffusion-based":[63],"framework":[64,90,190],"that":[65,116,176],"integrates":[66],"pretrained":[67],"large":[68],"language":[69],"models":[70],"generate":[72],"consistent":[74],"temporally":[76],"coherent":[77],"animations":[80],"conditioned":[81],"on":[82],"both":[83],"lyrics":[84],"acoustics.":[86],"Central":[87],"our":[89,189],"introduction":[93],"subtitles,":[96,165],"structured,":[98],"time-aligned":[99],"representation":[100],"generated":[101],"via":[102],"Singing":[104],"Chain-of-Thought":[105],"process":[106],"acoustic-guided":[108],"retrieval.":[109],"subtitles":[111],"provide":[112],"region-specific":[113],"expressive":[114,143,170],"cues":[115],"serve":[117],"as":[118,129],"interpretable":[119],"priors":[120],"synthesis.":[123,200],"We":[124],"further":[125],"formulate":[126],"intensity":[131],"prediction":[132],"over":[133],"key":[134],"facial":[135],"regions,":[136],"enabling":[137,166,195],"fine-grained":[138],"control":[139],"more":[141],"faithful":[142],"modeling.":[144],"support":[146],"paradigm,":[148],"construct":[150],"first":[152],"multimodal":[153],"singing":[154],"dataset":[155],"synchronized":[157],"motion,":[159],"acoustic":[160],"descriptors,":[161],"aligned":[163],"grounded":[168],"learning.":[172],"Extensive":[173],"experiments":[174],"demonstrate":[175],"Think2Sing":[177],"significantly":[178],"outperforms":[179],"state-of-the-art":[180],"methods":[181],"realism,":[183],"expressiveness,":[184],"emotional":[186],"fidelity.":[187],"Furthermore,":[188],"supports":[191],"flexible":[192],"subtitle-conditioned":[193],"editing,":[194],"precise":[196],"user-controllable":[198]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-29T00:00:00"}
