{"id":"https://openalex.org/W7162538995","doi":"https://doi.org/10.1109/3dv69130.2026.00073","title":"Supervising 3D Talking Head Avatars with Analysis-by-Audio-Synthesis","display_name":"Supervising 3D Talking Head Avatars with Analysis-by-Audio-Synthesis","publication_year":2026,"publication_date":"2026-03-20","ids":{"openalex":"https://openalex.org/W7162538995","doi":"https://doi.org/10.1109/3dv69130.2026.00073"},"language":null,"primary_location":{"id":"doi:10.1109/3dv69130.2026.00073","is_oa":false,"landing_page_url":"https://doi.org/10.1109/3dv69130.2026.00073","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on 3D Vision (3DV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137122356","display_name":"Radek Dan\u010de\u011bcek","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135521","display_name":"Max Planck Institute for Intelligent Systems","ror":"https://ror.org/04fq9j139","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210135521"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Radek Dan\u010de\u011bcek","raw_affiliation_strings":["Max Planck Institute for Intelligent Systems,T&#x0252;bingen,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Intelligent Systems,T&#x0252;bingen,Germany","institution_ids":["https://openalex.org/I4210135521"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036619305","display_name":"Carolin Schmitt","orcid":"https://orcid.org/0000-0001-5361-6683"},"institutions":[{"id":"https://openalex.org/I4210135521","display_name":"Max Planck Institute for Intelligent Systems","ror":"https://ror.org/04fq9j139","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210135521"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Carolin Schmitt","raw_affiliation_strings":["Max Planck Institute for Intelligent Systems,T&#x0252;bingen,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Intelligent Systems,T&#x0252;bingen,Germany","institution_ids":["https://openalex.org/I4210135521"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021042818","display_name":"Senya Polikovsky","orcid":"https://orcid.org/0000-0002-6030-1863"},"institutions":[{"id":"https://openalex.org/I4210135521","display_name":"Max Planck Institute for Intelligent Systems","ror":"https://ror.org/04fq9j139","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210135521"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Senya Polikovsky","raw_affiliation_strings":["Max Planck Institute for Intelligent Systems,T&#x0252;bingen,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Intelligent Systems,T&#x0252;bingen,Germany","institution_ids":["https://openalex.org/I4210135521"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5135225997","display_name":"Michael J. Black","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135521","display_name":"Max Planck Institute for Intelligent Systems","ror":"https://ror.org/04fq9j139","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210135521"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Michael J. Black","raw_affiliation_strings":["Max Planck Institute for Intelligent Systems,T&#x0252;bingen,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Intelligent Systems,T&#x0252;bingen,Germany","institution_ids":["https://openalex.org/I4210135521"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.85411807,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"704","last_page":"716"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.23029999434947968,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.23029999434947968,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.11789999902248383,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.11550000309944153,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/head","display_name":"Head (geology)","score":0.5630000233650208},{"id":"https://openalex.org/keywords/avatar","display_name":"Avatar","score":0.28790000081062317},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.23909999430179596},{"id":"https://openalex.org/keywords/virtual-reality","display_name":"Virtual reality","score":0.2304999977350235}],"concepts":[{"id":"https://openalex.org/C2780312720","wikidata":"https://www.wikidata.org/wiki/Q5689100","display_name":"Head (geology)","level":2,"score":0.5630000233650208},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4293000102043152},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3662000000476837},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3465000092983246},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31220000982284546},{"id":"https://openalex.org/C2777365542","wikidata":"https://www.wikidata.org/wiki/Q83090","display_name":"Avatar","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.2777999937534332},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.23909999430179596},{"id":"https://openalex.org/C194969405","wikidata":"https://www.wikidata.org/wiki/Q170519","display_name":"Virtual reality","level":2,"score":0.2304999977350235},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.22869999706745148}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/3dv69130.2026.00073","is_oa":false,"landing_page_url":"https://doi.org/10.1109/3dv69130.2026.00073","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on 3D Vision (3DV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W1566555074","https://openalex.org/W1990883837","https://openalex.org/W2029199293","https://openalex.org/W2064400654","https://openalex.org/W2107813907","https://openalex.org/W2120420721","https://openalex.org/W2141998673","https://openalex.org/W2154961933","https://openalex.org/W2293856338","https://openalex.org/W2468212864","https://openalex.org/W2516001803","https://openalex.org/W2604672468","https://openalex.org/W2737658251","https://openalex.org/W2739192055","https://openalex.org/W2745771616","https://openalex.org/W2769666294","https://openalex.org/W2771328060","https://openalex.org/W2803193013","https://openalex.org/W2804619907","https://openalex.org/W2937579788","https://openalex.org/W2945729334","https://openalex.org/W2964449965","https://openalex.org/W2970131683","https://openalex.org/W2972563022","https://openalex.org/W2981263323","https://openalex.org/W3035626590","https://openalex.org/W3080485275","https://openalex.org/W3095936335","https://openalex.org/W3096650361","https://openalex.org/W3099284785","https://openalex.org/W3154411171","https://openalex.org/W3160305627","https://openalex.org/W3174763799","https://openalex.org/W3180794345","https://openalex.org/W3209059054","https://openalex.org/W3213322812","https://openalex.org/W4200630629","https://openalex.org/W4206204999","https://openalex.org/W4293363567","https://openalex.org/W4311137818","https://openalex.org/W4312326867","https://openalex.org/W4312590328","https://openalex.org/W4312933868","https://openalex.org/W4375868850","https://openalex.org/W4380994134","https://openalex.org/W4385823403","https://openalex.org/W4386065848","https://openalex.org/W4386071707","https://openalex.org/W4386076250","https://openalex.org/W4387421789","https://openalex.org/W4387967971","https://openalex.org/W4388117482","https://openalex.org/W4388157164","https://openalex.org/W4390872449","https://openalex.org/W4390872742","https://openalex.org/W4390872748","https://openalex.org/W4390873123","https://openalex.org/W4393026872","https://openalex.org/W4393149781","https://openalex.org/W4400582137","https://openalex.org/W4400818936","https://openalex.org/W4402112200","https://openalex.org/W4402112242","https://openalex.org/W4402660084","https://openalex.org/W4402704505","https://openalex.org/W4402753806","https://openalex.org/W4402754135","https://openalex.org/W4403878207","https://openalex.org/W4404439875","https://openalex.org/W4409262657","https://openalex.org/W4411528087","https://openalex.org/W4413144700","https://openalex.org/W7160192299","https://openalex.org/W7160196549"],"related_works":[],"abstract_inverted_index":{"In":[0],"order":[1],"to":[2,93,198],"be":[3,91],"widely":[4],"applicable,":[5],"speech-driven":[6],"3D":[7,86,121,143],"head":[8,123,145,226],"avatars":[9,124,227],"must":[10],"articulate":[11],"their":[12],"lips":[13],"in":[14],"accordance":[15],"with":[16,24,50,65,125],"speech,":[17,201],"while":[18,228],"also":[19],"conveying":[20],"the":[21,56,78,95,99,107,184,188,199,219,222],"appropriate":[22],"emotions":[23],"dynamically":[25],"changing":[26],"facial":[27,169,237],"expressions.":[28],"The":[29,102],"key":[30],"problem":[31],"is":[32,196],"that":[33,69,148,165,195,215],"deterministic":[34],"models":[35,45],"produce":[36],"high-quality":[37],"lip-sync":[38,52,223],"but":[39,49],"without":[40],"rich":[41],"expressions,":[42],"whereas":[43],"stochastic":[44,63],"generate":[46],"diverse":[47],"expressions":[48],"lower":[51],"quality.":[53],"To":[54,68,128],"get":[55],"best":[57],"of":[58,221,224,233],"both,":[59],"we":[60,71,132,159,172],"seek":[61],"a":[62,73,82,115,142,150,161,177,193,203],"model":[64,164,175,186],"accurate":[66,126],"lipsync.":[67],"end,":[70],"develop":[72],"new":[74],"approach":[75],"based":[76],"on":[77],"following":[79],"observation:":[80],"if":[81],"method":[83],"generates":[84],"realistic":[85],"lip":[87,100],"motions,":[88],"it":[89],"should":[90,105],"possible":[92],"infer":[94],"spoken":[96],"audio":[97,167],"from":[98,168],"motion.":[101],"inferred":[103],"speech":[104],"match":[106],"original":[108],"input":[109,200],"audio,":[110],"and":[111,191,211],"erroneous":[112],"predictions":[113],"create":[114],"novel":[116,151,162],"supervision":[117,152,206],"signal":[118],"for":[119,231],"training":[120],"talking":[122,144,179,225],"lip-sync.":[127],"demonstrate":[129,214],"this":[130,174],"effect,":[131],"propose":[133],"THUNDER":[134,216],"(Talking":[135],"Heads":[136],"Under":[137],"Neural":[138],"Differentiable":[139],"Elocution":[140],"Reconstruction),":[141],"avatar":[146,180],"framework":[147],"introduces":[149],"mechanism":[153],"via":[154],"differentiable":[155,204],"sound":[156,194],"production.":[157],"First,":[158],"train":[160],"mesh-tospeech":[163],"regresses":[166],"animation.":[170],"Then,":[171],"incorporate":[173],"into":[176],"diffusion-based":[178],"framework.":[181],"During":[182],"training,":[183],"mesh-to-speech":[185],"takes":[187],"generated":[189],"animation":[190],"produces":[192],"compared":[197],"creating":[202],"analysis-by-audio-synthesis":[205],"loop.":[207],"Our":[208],"extensive":[209],"qualitative":[210],"quantitative":[212],"experiments":[213],"significantly":[217],"improves":[218],"quality":[220],"still":[229],"allowing":[230],"generation":[232],"diverse,":[234],"high-quality,":[235],"expressive":[236],"animations.":[238]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-28T00:00:00"}
