{"id":"https://openalex.org/W2921383185","doi":"https://doi.org/10.23919/apsipa.2018.8659713","title":"Multimodal Speech Driven Facial Shape Animation Using Deep Neural Networks","display_name":"Multimodal Speech Driven Facial Shape Animation Using Deep Neural Networks","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2921383185","doi":"https://doi.org/10.23919/apsipa.2018.8659713","mag":"2921383185"},"language":"en","primary_location":{"id":"doi:10.23919/apsipa.2018.8659713","is_oa":false,"landing_page_url":"https://doi.org/10.23919/apsipa.2018.8659713","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069030230","display_name":"Sasan Asadiabadi","orcid":"https://orcid.org/0000-0001-9774-6105"},"institutions":[{"id":"https://openalex.org/I1351752","display_name":"Ko\u00e7 University","ror":"https://ror.org/00jzwgz36","country_code":"TR","type":"education","lineage":["https://openalex.org/I1351752"]}],"countries":["TR"],"is_corresponding":true,"raw_author_name":"Sasan Asadiabadi","raw_affiliation_strings":["Multimedia, Vision and Graphics Laboratory, Ko\u00e7 University, Istanbul, Turkey"],"affiliations":[{"raw_affiliation_string":"Multimedia, Vision and Graphics Laboratory, Ko\u00e7 University, Istanbul, Turkey","institution_ids":["https://openalex.org/I1351752"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015529766","display_name":"Rizwan Sadiq","orcid":"https://orcid.org/0000-0003-1395-6290"},"institutions":[{"id":"https://openalex.org/I1351752","display_name":"Ko\u00e7 University","ror":"https://ror.org/00jzwgz36","country_code":"TR","type":"education","lineage":["https://openalex.org/I1351752"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Rizwan Sadiq","raw_affiliation_strings":["Multimedia, Vision and Graphics Laboratory, Ko\u00e7 University, Istanbul, Turkey"],"affiliations":[{"raw_affiliation_string":"Multimedia, Vision and Graphics Laboratory, Ko\u00e7 University, Istanbul, Turkey","institution_ids":["https://openalex.org/I1351752"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064993025","display_name":"Engin Erzin","orcid":"https://orcid.org/0000-0002-2715-2368"},"institutions":[{"id":"https://openalex.org/I1351752","display_name":"Ko\u00e7 University","ror":"https://ror.org/00jzwgz36","country_code":"TR","type":"education","lineage":["https://openalex.org/I1351752"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Engin Erzin","raw_affiliation_strings":["Multimedia, Vision and Graphics Laboratory, Ko\u00e7 University, Istanbul, Turkey"],"affiliations":[{"raw_affiliation_string":"Multimedia, Vision and Graphics Laboratory, Ko\u00e7 University, Istanbul, Turkey","institution_ids":["https://openalex.org/I1351752"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5069030230"],"corresponding_institution_ids":["https://openalex.org/I1351752"],"apc_list":null,"apc_paid":null,"fwci":0.7312,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.77647295,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1508","last_page":"1512"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9800999760627747,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8407305479049683},{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.7094871401786804},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6585556268692017},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6332948803901672},{"id":"https://openalex.org/keywords/computer-facial-animation","display_name":"Computer facial animation","score":0.5734183192253113},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5479975342750549},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5139879584312439},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4984159469604492},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4424085319042206},{"id":"https://openalex.org/keywords/mean-squared-error","display_name":"Mean squared error","score":0.42384177446365356},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3314719498157501},{"id":"https://openalex.org/keywords/computer-animation","display_name":"Computer animation","score":0.32732847332954407},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.07523095607757568}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8407305479049683},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.7094871401786804},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6585556268692017},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6332948803901672},{"id":"https://openalex.org/C138591656","wikidata":"https://www.wikidata.org/wiki/Q5157538","display_name":"Computer facial animation","level":4,"score":0.5734183192253113},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5479975342750549},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5139879584312439},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4984159469604492},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4424085319042206},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.42384177446365356},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3314719498157501},{"id":"https://openalex.org/C69369342","wikidata":"https://www.wikidata.org/wiki/Q1401416","display_name":"Computer animation","level":3,"score":0.32732847332954407},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.07523095607757568},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.23919/apsipa.2018.8659713","is_oa":false,"landing_page_url":"https://doi.org/10.23919/apsipa.2018.8659713","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},{"id":"pmh:oai:cdm21054.contentdm.oclc.org:IR/8562","is_oa":false,"landing_page_url":"https://doi.org/10.23919/APSIPA.2018.8659713","pdf_url":null,"source":{"id":"https://openalex.org/S4306401341","display_name":"Digital Collections portal (Ko\u00e7 University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1351752","host_organization_name":"Ko\u00e7 University","host_organization_lineage":["https://openalex.org/I1351752"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"Conference proceeding"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6299999952316284,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W33507944","https://openalex.org/W92142206","https://openalex.org/W943204654","https://openalex.org/W1522301498","https://openalex.org/W1559788381","https://openalex.org/W1565009026","https://openalex.org/W1635512741","https://openalex.org/W1995562189","https://openalex.org/W2015143272","https://openalex.org/W2062712751","https://openalex.org/W2067031273","https://openalex.org/W2095705004","https://openalex.org/W2102924784","https://openalex.org/W2115252128","https://openalex.org/W2128673549","https://openalex.org/W2156718546","https://openalex.org/W2295661697","https://openalex.org/W2402144811","https://openalex.org/W2737658251","https://openalex.org/W2738406145","https://openalex.org/W2747874407","https://openalex.org/W2953384591","https://openalex.org/W2964121744","https://openalex.org/W6674330103","https://openalex.org/W6677618333","https://openalex.org/W6713134421"],"related_works":["https://openalex.org/W1544039745","https://openalex.org/W2121378366","https://openalex.org/W2999276620","https://openalex.org/W2532377291","https://openalex.org/W1976926596","https://openalex.org/W2989004599","https://openalex.org/W3094080214","https://openalex.org/W2535923857","https://openalex.org/W2156310872","https://openalex.org/W2356609371"],"abstract_inverted_index":{"In":[0],"this":[1],"paper":[2],"we":[3,53],"present":[4],"a":[5,18],"deep":[6],"learning":[7],"multimodal":[8,95,117],"approach":[9,96],"for":[10,32],"speech":[11],"driven":[12],"generation":[13,73],"of":[14,23,27,74],"face":[15],"animations.":[16,34],"Training":[17],"speaker":[19,61,75],"independent":[20,62,76],"model,":[21],"capable":[22],"generating":[24],"different":[25],"emotions":[26],"the":[28,36,50,88,115,131],"speaker,":[29],"is":[30],"crucial":[31],"realistic":[33],"Unlike":[35],"previous":[37],"approaches":[38],"which":[39],"either":[40],"use":[41],"acoustic":[42,80],"features":[43,47],"or":[44],"phoneme":[45],"label":[46],"to":[48,57],"estimate":[49],"facial":[51],"movements,":[52],"utilize":[54],"both":[55],"modalities":[56],"generate":[58],"natural":[59],"looking":[60],"lip":[63],"animations":[64],"synchronized":[65],"with":[66],"affective":[67,85,103],"speech.":[68],"A":[69],"phoneme-based":[70],"model":[71,82,120],"qualifies":[72],"animation,":[77],"whereas":[78],"an":[79],"feature-based":[81],"enables":[83],"capturing":[84],"variation":[86],"during":[87],"animation":[89,119],"generation.":[90],"We":[91,113],"show":[92],"that":[93],"our":[94],"not":[97],"only":[98],"performs":[99],"significantly":[100],"better":[101],"on":[102],"data,":[104],"but":[105],"improves":[106],"performance":[107],"over":[108,136],"neutral":[109],"data":[110],"as":[111],"well.":[112],"evaluate":[114],"proposed":[116],"speech-driven":[118],"using":[121],"two":[122],"large":[123],"scale":[124],"datasets,":[125],"GRID":[126],"and":[127],"SAVEE,":[128],"by":[129],"reporting":[130],"mean":[132],"squared":[133],"error":[134],"(MSE)":[135],"various":[137],"network":[138],"structures.":[139]},"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":4},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2019-03-22T00:00:00"}
