{"id":"https://openalex.org/W4402093509","doi":"https://doi.org/10.1145/3691341","title":"Speed-Aware Audio-Driven Speech Animation using Adaptive Windows","display_name":"Speed-Aware Audio-Driven Speech Animation using Adaptive Windows","publication_year":2024,"publication_date":"2024-08-31","ids":{"openalex":"https://openalex.org/W4402093509","doi":"https://doi.org/10.1145/3691341"},"language":"en","primary_location":{"id":"doi:10.1145/3691341","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3691341","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3691341","source":{"id":"https://openalex.org/S185367456","display_name":"ACM Transactions on Graphics","issn_l":"0730-0301","issn":["0730-0301","1557-7368"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Graphics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3691341","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030648087","display_name":"Sunjin Jung","orcid":"https://orcid.org/0000-0001-6427-6258"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Sunjin Jung","raw_affiliation_strings":["Visual Media Lab, KAIST, Daejeon, Republic of Korea","Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0001-6427-6258","affiliations":[{"raw_affiliation_string":"Visual Media Lab, KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063397180","display_name":"Yeongho Seol","orcid":"https://orcid.org/0000-0002-7327-2950"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yeongho Seol","raw_affiliation_strings":["NVIDIA, Santa Clara, United States","NVIDIA Corp, Santa Clara, United States"],"raw_orcid":"https://orcid.org/0000-0002-7327-2950","affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, United States","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"NVIDIA Corp, Santa Clara, United States","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054403925","display_name":"Kwanggyoon Seo","orcid":"https://orcid.org/0000-0003-0570-4915"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Kwanggyoon Seo","raw_affiliation_strings":["Visual Media Lab, KAIST, Daejeon, Republic of Korea","Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0003-0570-4915","affiliations":[{"raw_affiliation_string":"Visual Media Lab, KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026190908","display_name":"Hyeonseo Na","orcid":"https://orcid.org/0000-0002-6818-1595"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyeonho Na","raw_affiliation_strings":["Visual Media Lab, KAIST, Daejeon, Republic of Korea","Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-6818-1595","affiliations":[{"raw_affiliation_string":"Visual Media Lab, KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101430463","display_name":"Seonghyeon Kim","orcid":"https://orcid.org/0000-0001-8027-8261"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Seonghyeon Kim","raw_affiliation_strings":["Visual Media Lab, KAIST, Daejeon, Republic of Korea and Anigma Technologies, Daejeon, Republic of Korea","Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon Republic of Korea","Anigma Technologies, Daejeon Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0001-8027-8261","affiliations":[{"raw_affiliation_string":"Visual Media Lab, KAIST, Daejeon, Republic of Korea and Anigma Technologies, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Anigma Technologies, Daejeon Republic of Korea","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073561308","display_name":"Vanessa Tan","orcid":"https://orcid.org/0009-0001-8174-6909"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Vanessa Tan","raw_affiliation_strings":["Visual Media Lab, KAIST, Daejeon, Republic of Korea","Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0001-8174-6909","affiliations":[{"raw_affiliation_string":"Visual Media Lab, KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026087865","display_name":"Junyong Noh","orcid":"https://orcid.org/0000-0003-1925-3326"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Junyong Noh","raw_affiliation_strings":["Visual Media Lab, KAIST, Daejeon, Republic of Korea","Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0003-1925-3326","affiliations":[{"raw_affiliation_string":"Visual Media Lab, KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Visual Media Lab, Korea Advanced Institute of Science and Technology (KAIST), Daejeon Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5030648087"],"corresponding_institution_ids":["https://openalex.org/I157485424"],"apc_list":null,"apc_paid":null,"fwci":1.3153,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.80448497,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"44","issue":"1","first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7987703680992126},{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.6329603791236877},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.5794322490692139},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36902880668640137}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7987703680992126},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.6329603791236877},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.5794322490692139},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36902880668640137}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3691341","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3691341","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3691341","source":{"id":"https://openalex.org/S185367456","display_name":"ACM Transactions on Graphics","issn_l":"0730-0301","issn":["0730-0301","1557-7368"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Graphics","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3691341","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3691341","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3691341","source":{"id":"https://openalex.org/S185367456","display_name":"ACM Transactions on Graphics","issn_l":"0730-0301","issn":["0730-0301","1557-7368"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Graphics","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.47999998927116394}],"awards":[],"funders":[{"id":"https://openalex.org/F4320323890","display_name":"Korea Creative Content Agency","ror":"https://ror.org/036vyg793"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402093509.pdf"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W173400130","https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W2029996593","https://openalex.org/W2098396290","https://openalex.org/W2237250383","https://openalex.org/W2286929393","https://openalex.org/W2468212864","https://openalex.org/W2737658251","https://openalex.org/W2738406145","https://openalex.org/W2739192055","https://openalex.org/W2741412112","https://openalex.org/W2804619907","https://openalex.org/W2963009026","https://openalex.org/W2963073614","https://openalex.org/W2981263323","https://openalex.org/W3036601975","https://openalex.org/W3097792222","https://openalex.org/W3106759947","https://openalex.org/W3154411171","https://openalex.org/W3169392543","https://openalex.org/W3174763799","https://openalex.org/W3199414382","https://openalex.org/W4200174933","https://openalex.org/W4200630629","https://openalex.org/W4210657261","https://openalex.org/W4236989128","https://openalex.org/W4237115936","https://openalex.org/W4246270042","https://openalex.org/W4248816236","https://openalex.org/W4297633285","https://openalex.org/W4297841435","https://openalex.org/W4386076250","https://openalex.org/W6781568228","https://openalex.org/W6948150082"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W4310844315","https://openalex.org/W2532377291","https://openalex.org/W2000013817","https://openalex.org/W2390279801","https://openalex.org/W4296190881","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2215755978"],"abstract_inverted_index":{"We":[0,173],"present":[1],"a":[2,12,28,69,74,116,125,182],"novel":[3],"method":[4,34,150,187],"that":[5,26,82,185],"can":[6,151],"generate":[7,152],"realistic":[8,153],"speech":[9,139,154],"animations":[10,155],"of":[11,55,103,107],"3D":[13],"face":[14],"from":[15,64,143,156],"audio":[16,31,38,44,67,85,105,147],"using":[17,68],"multiple":[18,104,145],"adaptive":[19,37,146],"windows.":[20,148],"In":[21],"contrast":[22],"to":[23,47,112,119,128],"previous":[24],"studies":[25],"use":[27,48,102],"fixed":[29],"size":[30,81],"window,":[32],"our":[33,186],"accepts":[35],"an":[36],"window":[39,80,118,127],"as":[40,110,168,170],"input,":[41],"reflecting":[42],"the":[43,59,65,78,84,92,101,113,134,138,144],"speaking":[45,60,94,161],"rate":[46,61],"consistent":[49],"phonemic":[50,131],"information.":[51],"Our":[52,149],"system":[53],"consists":[54],"three":[56],"parts.":[57],"First,":[58],"is":[62,87,141],"estimated":[63,93],"input":[66,111],"neural":[70],"network":[71],"trained":[72],"in":[73,100],"self-supervised":[75],"manner.":[76],"Second,":[77],"appropriate":[79],"encloses":[83],"features":[86],"predicted":[88],"adaptively":[89],"based":[90],"on":[91,121],"rate.":[95],"Another":[96],"key":[97],"element":[98],"lies":[99],"windows":[106],"different":[108],"sizes":[109],"animation":[114,140],"generator:":[115],"small":[117],"concentrate":[120],"detailed":[122],"information":[123,132],"and":[124,178],"large":[126],"consider":[129],"broad":[130],"near":[133],"center":[135],"frame.":[136],"Finally,":[137],"generated":[142],"in-the-wild":[157],"audios":[158],"at":[159],"any":[160],"rate,":[162],"i.e.,":[163],"fast":[164],"raps,":[165],"slow":[166],"songs,":[167],"well":[169],"normal":[171],"speech.":[172],"demonstrate":[174],"via":[175],"extensive":[176],"quantitative":[177],"qualitative":[179],"evaluations":[180],"including":[181],"user":[183],"study":[184],"outperforms":[188],"state-of-the-art":[189],"approaches.":[190]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
