{"id":"https://openalex.org/W2142550732","doi":"https://doi.org/10.21437/interspeech.2010-204","title":"Setup for acoustic-visual speech synthesis by concatenating bimodal units","display_name":"Setup for acoustic-visual speech synthesis by concatenating bimodal units","publication_year":2010,"publication_date":"2010-09-26","ids":{"openalex":"https://openalex.org/W2142550732","doi":"https://doi.org/10.21437/interspeech.2010-204","mag":"2142550732"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2010-204","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2010-204","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2010","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://inria.hal.science/inria-00526766","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030228191","display_name":"Asterios Toutios","orcid":"https://orcid.org/0000-0003-3193-2241"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Asterios Toutios","raw_affiliation_strings":["Analysis, perception and recognition of speech"],"affiliations":[{"raw_affiliation_string":"Analysis, perception and recognition of speech","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020473453","display_name":"Utpala Musti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Utpala Musti","raw_affiliation_strings":["Analysis, perception and recognition of speech"],"affiliations":[{"raw_affiliation_string":"Analysis, perception and recognition of speech","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044130608","display_name":"Slim Ouni","orcid":"https://orcid.org/0000-0001-5286-7368"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Slim Ouni","raw_affiliation_strings":["Analysis, perception and recognition of speech"],"affiliations":[{"raw_affiliation_string":"Analysis, perception and recognition of speech","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039712983","display_name":"Vincent Colotte","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vincent Colotte","raw_affiliation_strings":["Analysis, perception and recognition of speech"],"affiliations":[{"raw_affiliation_string":"Analysis, perception and recognition of speech","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046710569","display_name":"Brigitte Wrobel-Dautcourt","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brigitte Wrobel-Dautcourt","raw_affiliation_strings":["Visual Augmentation of Complex Environments"],"affiliations":[{"raw_affiliation_string":"Visual Augmentation of Complex Environments","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027151885","display_name":"Marie\u2010Odile Berger","orcid":"https://orcid.org/0000-0002-9371-1367"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marie-Odile Berger","raw_affiliation_strings":["Visual Augmentation of Complex Environments"],"affiliations":[{"raw_affiliation_string":"Visual Augmentation of Complex Environments","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5030228191"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2931,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.82828448,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"486","last_page":"489"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9485999941825867,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8293097019195557},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6344711780548096},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.6243395805358887},{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.6074098348617554},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.5836122035980225},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5093215703964233},{"id":"https://openalex.org/keywords/join","display_name":"Join (topology)","score":0.5059704184532166},{"id":"https://openalex.org/keywords/computer-facial-animation","display_name":"Computer facial animation","score":0.46089521050453186},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.45791593194007874},{"id":"https://openalex.org/keywords/computer-animation","display_name":"Computer animation","score":0.43218836188316345},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.41295480728149414},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35107988119125366},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.19553136825561523},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07473567128181458}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8293097019195557},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6344711780548096},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.6243395805358887},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.6074098348617554},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.5836122035980225},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5093215703964233},{"id":"https://openalex.org/C2776124973","wikidata":"https://www.wikidata.org/wiki/Q3183033","display_name":"Join (topology)","level":2,"score":0.5059704184532166},{"id":"https://openalex.org/C138591656","wikidata":"https://www.wikidata.org/wiki/Q5157538","display_name":"Computer facial animation","level":4,"score":0.46089521050453186},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.45791593194007874},{"id":"https://openalex.org/C69369342","wikidata":"https://www.wikidata.org/wiki/Q1401416","display_name":"Computer animation","level":3,"score":0.43218836188316345},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.41295480728149414},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35107988119125366},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.19553136825561523},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07473567128181458},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/interspeech.2010-204","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2010-204","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2010","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.723.6269","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.723.6269","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://sipi.usc.edu/%7Etoutios/papers/toutios_interspeech2010.pdf","raw_type":"text"},{"id":"pmh:oai:HAL:inria-00526766v1","is_oa":true,"landing_page_url":"https://inria.hal.science/inria-00526766","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Interspeech 2010, ISCA, Sep 2010, Makuhari, Chiba, Japan. pp.486-489","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:inria-00526766v1","is_oa":true,"landing_page_url":"https://inria.hal.science/inria-00526766","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Interspeech 2010, ISCA, Sep 2010, Makuhari, Chiba, Japan. pp.486-489","raw_type":"Conference papers"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W8150067","https://openalex.org/W85054027","https://openalex.org/W1550599496","https://openalex.org/W1608978487","https://openalex.org/W2019163108","https://openalex.org/W2036948202","https://openalex.org/W2127211243","https://openalex.org/W2206512106","https://openalex.org/W2406132719","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W1544039745","https://openalex.org/W2121378366","https://openalex.org/W2999276620","https://openalex.org/W2532377291","https://openalex.org/W1976926596","https://openalex.org/W2989004599","https://openalex.org/W3094080214","https://openalex.org/W2535923857","https://openalex.org/W2156310872","https://openalex.org/W2356609371"],"abstract_inverted_index":{"This":[0,24],"paper":[1],"presents":[2],"preliminary":[3],"work":[4],"on":[5,71],"building":[6],"a":[7,17,47,84],"system":[8,116],"able":[9],"to":[10,64,114],"synthesize":[11],"concurrently":[12],"the":[13,21,54,91,94,98,103,115],"speech":[14,100,122],"signal":[15,101],"and":[16,39,58,74,102,112],"3D":[18],"animation":[19,105],"of":[20,56,93,107],"speaker\u2019s":[22],"face.":[23],"is":[25,44,69],"done":[26],"by":[27],"concatenating":[28],"bimodal":[29],"di-phone":[30],"units,":[31],"that":[32,35],"is,":[33],"units":[34],"comprise":[36],"both":[37,97],"acoustic":[38],"visual":[40,85],"information.":[41],"The":[42,50],"latter":[43],"acquired":[45],"using":[46],"stereovision":[48],"tech-nique.":[49],"proposed":[51],"method":[52],"addresses":[53],"problems":[55],"asyn-chrony":[57],"incoherence":[59],"inherent":[60],"in":[61],"classic":[62,72],"approaches":[63],"au-diovisual":[65],"synthesis.":[66],"Unit":[67],"selection":[68],"based":[70],"target":[73],"join":[75,86],"costs":[76],"from":[77],"acoustic-only":[78],"synthesis,":[79,123],"which":[80],"are":[81,106,117],"augmented":[82],"with":[83],"cost.":[87],"Preliminary":[88],"results":[89],"indicate":[90],"benefits":[92],"approach,":[95],"since":[96],"synthesized":[99],"face":[104],"good":[108],"quality.":[109],"Planned":[110],"improvements":[111],"enhancements":[113],"outlined.":[118],"Index":[119],"Terms:":[120],"audiovisual":[121],"talking":[124],"head,":[125],"bi-modal":[126],"unit":[127],"concatenation,":[128],"diphones":[129]},"counts_by_year":[{"year":2013,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
