{"id":"https://openalex.org/W7109976956","doi":"https://doi.org/10.1145/3757377.3763939","title":"Audio Driven Universal Gaussian Head Avatars","display_name":"Audio Driven Universal Gaussian Head Avatars","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W7109976956","doi":"https://doi.org/10.1145/3757377.3763939"},"language":null,"primary_location":{"id":"doi:10.1145/3757377.3763939","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763939","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3757377.3763939","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Kartik Teotia","orcid":"https://orcid.org/0009-0007-6985-7159"},"institutions":[{"id":"https://openalex.org/I4210109712","display_name":"Max Planck Institute for Informatics","ror":"https://ror.org/01w19ak89","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210109712"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Kartik Teotia","raw_affiliation_strings":["Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany"],"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I4210109712"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Helge Rhodin","orcid":"https://orcid.org/0000-0003-2692-0801"},"institutions":[{"id":"https://openalex.org/I4210109712","display_name":"Max Planck Institute for Informatics","ror":"https://ror.org/01w19ak89","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210109712"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Helge Rhodin","raw_affiliation_strings":["Max Planck Institute for Informatics, Saarbr\u00fccken, Germany"],"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Informatics, Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I4210109712"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mohit Mendiratta","orcid":"https://orcid.org/0009-0001-5577-157X"},"institutions":[{"id":"https://openalex.org/I4210109712","display_name":"Max Planck Institute for Informatics","ror":"https://ror.org/01w19ak89","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210109712"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Mohit Mendiratta","raw_affiliation_strings":["Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany"],"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I4210109712"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hyeongwoo Kim","orcid":"https://orcid.org/0000-0003-0858-0882"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Hyeongwoo Kim","raw_affiliation_strings":["Imperial College London, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Imperial College London, London, United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Marc Habermann","orcid":"https://orcid.org/0000-0003-3899-7515"},"institutions":[{"id":"https://openalex.org/I4210109712","display_name":"Max Planck Institute for Informatics","ror":"https://ror.org/01w19ak89","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210109712"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Marc Habermann","raw_affiliation_strings":["Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany"],"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I4210109712"]}]},{"author_position":"last","author":{"id":null,"display_name":"Christian Theobalt","orcid":"https://orcid.org/0000-0001-6104-6625"},"institutions":[{"id":"https://openalex.org/I4210109712","display_name":"Max Planck Institute for Informatics","ror":"https://ror.org/01w19ak89","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210109712"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christian Theobalt","raw_affiliation_strings":["Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany"],"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I4210109712"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210109712"],"apc_list":null,"apc_paid":null,"fwci":1.2784,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86727217,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.7271000146865845,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.7271000146865845,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.19120000302791595,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.02449999935925007,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5936999917030334},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.5336999893188477},{"id":"https://openalex.org/keywords/avatar","display_name":"Avatar","score":0.5182999968528748},{"id":"https://openalex.org/keywords/active-appearance-model","display_name":"Active appearance model","score":0.46309998631477356},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.46070000529289246},{"id":"https://openalex.org/keywords/computer-facial-animation","display_name":"Computer facial animation","score":0.43630000948905945},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.3846000134944916},{"id":"https://openalex.org/keywords/gaze","display_name":"Gaze","score":0.36719998717308044},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.3621000051498413},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.35269999504089355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.765500009059906},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.609499990940094},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5936999917030334},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5626999735832214},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.5336999893188477},{"id":"https://openalex.org/C2777365542","wikidata":"https://www.wikidata.org/wiki/Q83090","display_name":"Avatar","level":2,"score":0.5182999968528748},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.46309998631477356},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.46070000529289246},{"id":"https://openalex.org/C138591656","wikidata":"https://www.wikidata.org/wiki/Q5157538","display_name":"Computer facial animation","level":4,"score":0.43630000948905945},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3846000134944916},{"id":"https://openalex.org/C2779916870","wikidata":"https://www.wikidata.org/wiki/Q14467155","display_name":"Gaze","level":2,"score":0.36719998717308044},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.3621000051498413},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.35269999504089355},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3513000011444092},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3483999967575073},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.32420000433921814},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.31619998812675476},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.29100000858306885},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C2780312720","wikidata":"https://www.wikidata.org/wiki/Q5689100","display_name":"Head (geology)","level":2,"score":0.27959999442100525},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.2709999978542328},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.26739999651908875},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C184297639","wikidata":"https://www.wikidata.org/wiki/Q177765","display_name":"Biometrics","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.2540999948978424},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.25220000743865967}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3757377.3763939","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763939","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3757377.3763939","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763939","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W2133665775","https://openalex.org/W2237250383","https://openalex.org/W2737658251","https://openalex.org/W2739192055","https://openalex.org/W2760103357","https://openalex.org/W2769666294","https://openalex.org/W2962785568","https://openalex.org/W2963081548","https://openalex.org/W3109585842","https://openalex.org/W3120163087","https://openalex.org/W3154411171","https://openalex.org/W4200630629","https://openalex.org/W4214605256","https://openalex.org/W4286611181","https://openalex.org/W4286611310","https://openalex.org/W4312578889","https://openalex.org/W4312590328","https://openalex.org/W4328028374","https://openalex.org/W4385318467","https://openalex.org/W4386071566","https://openalex.org/W4386076250","https://openalex.org/W4388157164","https://openalex.org/W4390872742","https://openalex.org/W4392285611","https://openalex.org/W4396821417","https://openalex.org/W4400582137","https://openalex.org/W4400818936","https://openalex.org/W4402660084","https://openalex.org/W4402727898","https://openalex.org/W4402753521","https://openalex.org/W4404525371","https://openalex.org/W4404965632","https://openalex.org/W4404985117","https://openalex.org/W4412587746","https://openalex.org/W4413259111","https://openalex.org/W4415799309"],"related_works":[],"abstract_inverted_index":{"We":[0],"introduce":[1],"the":[2,45,80,125,135,184],"first":[3,185],"method":[4,180],"for":[5,119,193],"audio-driven":[6,143,187],"universal":[7,71],"photorealistic":[8],"avatar":[9,188],"synthesis,":[10],"combining":[11],"a":[12,103],"person-agnostic":[13],"speech":[14,72],"model":[15,73,127,189],"with":[16,37,152],"our":[17,33,70,179],"novel":[18],"Universal":[19],"Head":[20],"Avatar":[21],"Prior":[22],"(UHAP).":[23],"UHAP":[24,34,81,147],"is":[25,35,181],"trained":[26],"on":[27,133],"cross-identity":[28],"multi-view":[29],"videos.":[30],"In":[31,51],"particular,":[32],"supervised":[36],"neutral":[38],"scan":[39],"data,":[40],"enabling":[41],"it":[42,123,200],"to":[43,53,61,98,130],"capture":[44],"identity-specific":[46],"details":[47],"at":[48],"high":[49],"fidelity.":[50],"contrast":[52],"previous":[54],"approaches,":[55],"which":[56,106],"predominantly":[57],"map":[58],"audio":[59,77],"features":[60],"geometric":[62,91],"deformations":[63],"only":[64,183],"while":[65],"ignoring":[66],"audio-dependent":[67],"appearance":[68,93,138,170,195],"variations,":[69],"directly":[74],"maps":[75],"raw":[76],"inputs":[78],"into":[79],"latent":[82],"expression":[83,86,112,144],"space.":[84],"This":[85],"space":[87],"inherently":[88],"encodes,":[89],"both,":[90],"and":[92,139,156,166,197,214],"variations.":[94],"For":[95],"efficient":[96],"personalization":[97],"new":[99],"subjects,":[100],"we":[101],"employ":[102],"monocular":[104],"encoder,":[105],"enables":[107,124],"lightweight":[108],"regression":[109],"of":[110],"dynamic":[111],"variations":[113],"across":[114,206],"video":[115],"frames.":[116],"By":[117],"accounting":[118],"these":[120,142],"expression-dependent":[121],"changes,":[122],"subsequent":[126],"fine-tuning":[128],"stage":[129],"focus":[131],"exclusively":[132],"capturing":[134],"subject\u2019s":[136],"global":[137],"geometry.":[140],"Decoding":[141],"codes":[145],"via":[146],"generates":[148],"highly":[149],"realistic":[150,167],"avatars":[151],"precise":[153],"lip":[154],"synchronization":[155],"nuanced":[157],"expressive":[158],"details,":[159],"such":[160],"as":[161,171,173],"eyebrow":[162],"movement,":[163],"gaze":[164],"shifts,":[165],"mouth":[168],"interior":[169],"well":[172],"motion.":[174],"Extensive":[175],"evaluations":[176],"demonstrate":[177],"that":[178,190],"not":[182],"generalizable":[186],"can":[191],"account":[192],"detailed":[194],"modeling":[196],"rendering,":[198],"but":[199],"also":[201],"outperforms":[202],"competing":[203],"(geometry-only)":[204],"methods":[205],"metrics":[207],"measuring":[208],"lip-sync":[209],"accuracy,":[210],"quantitative":[211],"image":[212],"quality,":[213],"perceptual":[215],"realism.":[216]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-08T00:00:00"}
