{"id":"https://openalex.org/W2945998549","doi":"https://doi.org/10.1145/3340555.3353745","title":"Speaker-Independent Speech-Driven Visual Speech Synthesis using Domain-Adapted Acoustic Models","display_name":"Speaker-Independent Speech-Driven Visual Speech Synthesis using Domain-Adapted Acoustic Models","publication_year":2019,"publication_date":"2019-10-14","ids":{"openalex":"https://openalex.org/W2945998549","doi":"https://doi.org/10.1145/3340555.3353745","mag":"2945998549"},"language":"en","primary_location":{"id":"doi:10.1145/3340555.3353745","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3340555.3353745","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Conference on Multimodal Interaction","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1905.06860","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101807751","display_name":"Ahmed Hussen Abdelaziz","orcid":"https://orcid.org/0000-0001-8027-4666"},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ahmed Hussen Abdelaziz","raw_affiliation_strings":["Apple Inc., Cupertino, CA","[Apple Inc., Cupertino, CA]"],"affiliations":[{"raw_affiliation_string":"Apple Inc., Cupertino, CA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"[Apple Inc., Cupertino, CA]","institution_ids":["https://openalex.org/I4210153776"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112911728","display_name":"Barry-John Theobald","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Barry-John Theobald","raw_affiliation_strings":["Apple Inc., Cupertino, CA","[Apple Inc., Cupertino, CA]"],"affiliations":[{"raw_affiliation_string":"Apple Inc., Cupertino, CA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"[Apple Inc., Cupertino, CA]","institution_ids":["https://openalex.org/I4210153776"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103014977","display_name":"Justin Binder","orcid":"https://orcid.org/0000-0003-0294-6601"},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Justin Binder","raw_affiliation_strings":["Apple Inc., Cupertino, CA","[Apple Inc., Cupertino, CA]"],"affiliations":[{"raw_affiliation_string":"Apple Inc., Cupertino, CA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"[Apple Inc., Cupertino, CA]","institution_ids":["https://openalex.org/I4210153776"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069340484","display_name":"Gabriele Fanelli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gabriele Fanelli","raw_affiliation_strings":["Apple Inc., Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Apple Inc., Zurich, Switzerland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049896100","display_name":"Paul R. Dixon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paul Dixon","raw_affiliation_strings":["Apple Inc., Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Apple Inc., Zurich, Switzerland","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nick Apostoloff","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nick Apostoloff","raw_affiliation_strings":["Apple Inc., Cupertino, CA","[Apple Inc., Cupertino, CA]"],"affiliations":[{"raw_affiliation_string":"Apple Inc., Cupertino, CA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"[Apple Inc., Cupertino, CA]","institution_ids":["https://openalex.org/I4210153776"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012536727","display_name":"Thibaut Weise","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thibaut Weise","raw_affiliation_strings":["Apple Inc., Cupertino, CA","[Apple Inc., Cupertino, CA]"],"affiliations":[{"raw_affiliation_string":"Apple Inc., Cupertino, CA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"[Apple Inc., Cupertino, CA]","institution_ids":["https://openalex.org/I4210153776"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091124471","display_name":"Sachin Kajareker","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153776","display_name":"Apple (United States)","ror":"https://ror.org/059hsda18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153776"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sachin Kajareker","raw_affiliation_strings":["Apple Inc., Cupertino, CA","[Apple Inc., Cupertino, CA]"],"affiliations":[{"raw_affiliation_string":"Apple Inc., Cupertino, CA","institution_ids":["https://openalex.org/I4210153776"]},{"raw_affiliation_string":"[Apple Inc., Cupertino, CA]","institution_ids":["https://openalex.org/I4210153776"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101807751"],"corresponding_institution_ids":["https://openalex.org/I4210153776"],"apc_list":null,"apc_paid":null,"fwci":0.167,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.4273369,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"220","last_page":"225"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7987487316131592},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7595378160476685},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.5999970436096191},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5361546874046326},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.5337323546409607},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4596078395843506},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4365115761756897},{"id":"https://openalex.org/keywords/viseme","display_name":"Viseme","score":0.42714935541152954},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4229954779148102},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4176008105278015},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3781614601612091}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7987487316131592},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7595378160476685},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.5999970436096191},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5361546874046326},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.5337323546409607},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4596078395843506},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4365115761756897},{"id":"https://openalex.org/C33767174","wikidata":"https://www.wikidata.org/wiki/Q371190","display_name":"Viseme","level":4,"score":0.42714935541152954},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4229954779148102},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4176008105278015},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3781614601612091},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3340555.3353745","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3340555.3353745","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Conference on Multimodal Interaction","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1905.06860","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.06860","pdf_url":"https://arxiv.org/pdf/1905.06860","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2945998549","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1905.06860v1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1905.06860","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1905.06860","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1905.06860","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.06860","pdf_url":"https://arxiv.org/pdf/1905.06860","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2945998549.pdf","grobid_xml":"https://content.openalex.org/works/W2945998549.grobid-xml"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W63958146","https://openalex.org/W1524333225","https://openalex.org/W1569907127","https://openalex.org/W1588539311","https://openalex.org/W1972978214","https://openalex.org/W1974357398","https://openalex.org/W1976295221","https://openalex.org/W1992187125","https://openalex.org/W2000911139","https://openalex.org/W2004789217","https://openalex.org/W2024490110","https://openalex.org/W2070726616","https://openalex.org/W2104480821","https://openalex.org/W2105833168","https://openalex.org/W2110501904","https://openalex.org/W2114336453","https://openalex.org/W2120067677","https://openalex.org/W2120654454","https://openalex.org/W2127211243","https://openalex.org/W2128173845","https://openalex.org/W2129360799","https://openalex.org/W2131342762","https://openalex.org/W2147885303","https://openalex.org/W2158069733","https://openalex.org/W2162598851","https://openalex.org/W2289286917","https://openalex.org/W2295661697","https://openalex.org/W2515372520","https://openalex.org/W2624413595","https://openalex.org/W2725151104","https://openalex.org/W2737658251","https://openalex.org/W2738406145","https://openalex.org/W2739192055","https://openalex.org/W2762899171","https://openalex.org/W2790649793","https://openalex.org/W2796931171","https://openalex.org/W2804600264","https://openalex.org/W2895226286","https://openalex.org/W2964559396","https://openalex.org/W4285719527","https://openalex.org/W4309145835"],"related_works":["https://openalex.org/W2980562267","https://openalex.org/W2515372520","https://openalex.org/W2625027024","https://openalex.org/W2124460610","https://openalex.org/W2752520680","https://openalex.org/W2797032258","https://openalex.org/W2987026005","https://openalex.org/W2112348857","https://openalex.org/W111284038","https://openalex.org/W2017071914","https://openalex.org/W2589081144","https://openalex.org/W2077889979","https://openalex.org/W2064347532","https://openalex.org/W1520113716","https://openalex.org/W2142574745","https://openalex.org/W2790109708","https://openalex.org/W1528020106","https://openalex.org/W1809822655","https://openalex.org/W1503933356","https://openalex.org/W2900292050"],"abstract_inverted_index":{"Speech-driven":[0],"visual":[1,72,95,149],"speech":[2,7,64,73,96,150,160],"synthesis":[3,74,97,151],"involves":[4],"mapping":[5,20],"acoustic":[6,67,164],"features":[8],"to":[9,30,48,93,119],"the":[10,71,78,94,113,116,134,138,142,156,162],"corresponding":[11],"lip":[12],"animation":[13],"controls":[14],"for":[15,53,70],"a":[16,26,46,107,120],"face":[17],"model.":[18,123,145],"This":[19],"can":[21,152],"take":[22],"many":[23],"forms,":[24],"but":[25],"powerful":[27,157],"approach":[28],"is":[29,45,90],"use":[31],"deep":[32],"neural":[33],"networks":[34],"(DNNs).":[35],"The":[36,88,124],"lack":[37],"of":[38,84,102,115,159],"synchronized":[39,103],"audio,":[40],"video,":[41],"and":[42],"depth":[43],"data":[44],"limitation":[47],"reliably":[49],"train":[50,77],"DNNs,":[51],"especially":[52],"speaker-independent":[54],"models.":[55,165],"In":[56],"this":[57],"paper,":[58],"we":[59,111],"investigate":[60],"adapting":[61],"an":[62],"automatic":[63],"recognition":[65],"(ASR)":[66],"model":[68],"(AM)":[69],"problem.":[75],"We":[76,146],"ASR-AM":[79,89],"on":[80],"ten":[81],"thousand":[82],"hours":[83,101],"audio-only":[85],"transcribed":[86],"speech.":[87,105],"then":[91],"adapted":[92],"domain":[98],"using":[99,141],"ninety":[100],"audio-visual":[104],"Using":[106],"subjective":[108],"assessment":[109],"test,":[110],"compared":[112],"performance":[114],"AM-initialized":[117,135],"DNN":[118,136],"randomly":[121,143],"initialized":[122,144],"results":[125],"show":[126],"that":[127,148],"viewers":[128],"significantly":[129,153],"prefer":[130],"animations":[131],"generated":[132,140],"from":[133,155],"than":[137],"ones":[139],"conclude":[147],"benefit":[154],"representation":[158],"in":[161],"ASR":[163]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
