{"id":"https://openalex.org/W4402466911","doi":"https://doi.org/10.48550/arxiv.2407.18595","title":"LinguaLinker: Audio-Driven Portraits Animation with Implicit Facial Control Enhancement","display_name":"LinguaLinker: Audio-Driven Portraits Animation with Implicit Facial Control Enhancement","publication_year":2024,"publication_date":"2024-07-26","ids":{"openalex":"https://openalex.org/W4402466911","doi":"https://doi.org/10.48550/arxiv.2407.18595"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2407.18595","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.18595","pdf_url":"https://arxiv.org/pdf/2407.18595","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.18595","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115603637","display_name":"Rui Zhang","orcid":"https://orcid.org/0000-0002-1360-2452"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Rui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111324098","display_name":"Yixiao Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Yixiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020247034","display_name":"Zhengnan Lu","orcid":"https://orcid.org/0000-0001-6353-2344"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Zhengnan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088192521","display_name":"Pei Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Pei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109780754","display_name":"Zebiao Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Zebiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020213613","display_name":"Bin Fu","orcid":"https://orcid.org/0000-0003-4590-0216"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Bin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5115603637"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9591000080108643,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9591000080108643,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13310","display_name":"Subtitles and Audiovisual Media","score":0.9556000232696533,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9067999720573425,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/portrait","display_name":"Portrait","score":0.7167153358459473},{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.6946395635604858},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5210251808166504},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4906105101108551},{"id":"https://openalex.org/keywords/computer-facial-animation","display_name":"Computer facial animation","score":0.4802706241607666},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.4304872751235962},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.40038803219795227},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3373781442642212},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.29183730483055115},{"id":"https://openalex.org/keywords/computer-animation","display_name":"Computer animation","score":0.2725561261177063},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.22753557562828064},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20425942540168762},{"id":"https://openalex.org/keywords/visual-arts","display_name":"Visual arts","score":0.161848783493042}],"concepts":[{"id":"https://openalex.org/C162462552","wikidata":"https://www.wikidata.org/wiki/Q134307","display_name":"Portrait","level":2,"score":0.7167153358459473},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.6946395635604858},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5210251808166504},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4906105101108551},{"id":"https://openalex.org/C138591656","wikidata":"https://www.wikidata.org/wiki/Q5157538","display_name":"Computer facial animation","level":4,"score":0.4802706241607666},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.4304872751235962},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.40038803219795227},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3373781442642212},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.29183730483055115},{"id":"https://openalex.org/C69369342","wikidata":"https://www.wikidata.org/wiki/Q1401416","display_name":"Computer animation","level":3,"score":0.2725561261177063},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.22753557562828064},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20425942540168762},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.161848783493042}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2407.18595","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.18595","pdf_url":"https://arxiv.org/pdf/2407.18595","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2407.18595","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2407.18595","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.18595","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.18595","pdf_url":"https://arxiv.org/pdf/2407.18595","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402466911.pdf","grobid_xml":"https://content.openalex.org/works/W4402466911.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1544039745","https://openalex.org/W2532377291","https://openalex.org/W2121378366","https://openalex.org/W2999276620","https://openalex.org/W2617644139","https://openalex.org/W4400097232","https://openalex.org/W1976926596","https://openalex.org/W4310844315","https://openalex.org/W3020604125","https://openalex.org/W577858612"],"abstract_inverted_index":{"This":[0],"study":[1],"delves":[2],"into":[3],"the":[4,16,50,65,72,75,82,96,122,127,132],"intricacies":[5],"of":[6,18,81,98,112,124,129],"synchronizing":[7],"facial":[8,32],"dynamics":[9],"with":[10],"multilingual":[11],"audio":[12,60],"inputs,":[13],"focusing":[14],"on":[15],"creation":[17],"visually":[19],"compelling,":[20],"time-synchronized":[21],"animations":[22],"through":[23],"diffusion-based":[24,41],"techniques.":[25],"Diverging":[26],"from":[27],"traditional":[28],"parametric":[29],"models":[30],"for":[31,105,145],"animation,":[33],"our":[34,138],"approach,":[35],"termed":[36],"LinguaLinker,":[37],"adopts":[38],"a":[39,106,142],"holistic":[40],"framework":[42],"that":[43],"integrates":[44],"audio-driven":[45,87],"visual":[46,56,88],"synthesis":[47,89],"to":[48],"enhance":[49],"synergy":[51],"between":[52],"auditory":[53],"stimuli":[54],"and":[55,63,78,101,109,131],"responses.":[57],"We":[58],"process":[59],"features":[61],"separately":[62],"derive":[64],"corresponding":[66],"control":[67,93],"gates,":[68],"which":[69],"implicitly":[70],"govern":[71],"movements":[73],"in":[74,121,149],"mouth,":[76],"eyes,":[77],"head,":[79],"irrespective":[80],"portrait's":[83],"origin.":[84],"The":[85,118],"advanced":[86],"mechanism":[90],"provides":[91],"nuanced":[92],"but":[94],"keeps":[95],"compatibility":[97],"output":[99],"video":[100],"input":[102],"audio,":[103],"allowing":[104],"more":[107],"tailored":[108],"effective":[110],"portrayal":[111],"distinct":[113],"personas":[114],"across":[115],"different":[116],"languages.":[117],"significant":[119],"improvements":[120],"fidelity":[123],"animated":[125],"portraits,":[126],"accuracy":[128],"lip-syncing,":[130],"appropriate":[133],"motion":[134],"variations":[135],"achieved":[136],"by":[137],"method":[139],"render":[140],"it":[141],"versatile":[143],"tool":[144],"animating":[146],"any":[147,150],"portrait":[148],"language.":[151]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2024-09-12T00:00:00"}
