{"id":"https://openalex.org/W2135212327","doi":"https://doi.org/10.1109/tnn.2002.1021891","title":"An HMM-based speech-to-video synthesizer","display_name":"An HMM-based speech-to-video synthesizer","publication_year":2002,"publication_date":"2002-07-01","ids":{"openalex":"https://openalex.org/W2135212327","doi":"https://doi.org/10.1109/tnn.2002.1021891","mag":"2135212327","pmid":"https://pubmed.ncbi.nlm.nih.gov/18244486"},"language":"en","primary_location":{"id":"doi:10.1109/tnn.2002.1021891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnn.2002.1021891","pdf_url":null,"source":{"id":"https://openalex.org/S42080949","display_name":"IEEE Transactions on Neural Networks","issn_l":"1045-9227","issn":["1045-9227","1941-0093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078726591","display_name":"J.J. Williams","orcid":null},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"J.J. Williams","raw_affiliation_strings":["Dept. of Electr. and Comput. Eng., Northwestern Univ., Evanston, IL, USA","Dept. of Electr. & Comput. Eng., Northwestern Univ., Evanston, IL, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of Electr. and Comput. Eng., Northwestern Univ., Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]},{"raw_affiliation_string":"Dept. of Electr. & Comput. Eng., Northwestern Univ., Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048650003","display_name":"Aggelos K. Katsaggelos","orcid":"https://orcid.org/0000-0003-4554-0070"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"A.K. Katsaggelos","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL, USA","Dept. of Electr. & Comput. Eng., Northwestern Univ., Evanston, IL, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]},{"raw_affiliation_string":"Dept. of Electr. & Comput. Eng., Northwestern Univ., Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.9188,"has_fulltext":false,"cited_by_count":32,"citation_normalized_percentile":{"value":0.96129032,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"13","issue":"4","first_page":"900","last_page":"915"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speechreading","display_name":"Speechreading","score":0.9159221649169922},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.8517690896987915},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8209131956100464},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7685011625289917},{"id":"https://openalex.org/keywords/telephony","display_name":"Telephony","score":0.4746958613395691},{"id":"https://openalex.org/keywords/narrowband","display_name":"Narrowband","score":0.44648081064224243},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4334617257118225},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4326789677143097},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4158661961555481},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35122328996658325}],"concepts":[{"id":"https://openalex.org/C2910309083","wikidata":"https://www.wikidata.org/wiki/Q1069953","display_name":"Speechreading","level":2,"score":0.9159221649169922},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.8517690896987915},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8209131956100464},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7685011625289917},{"id":"https://openalex.org/C195358072","wikidata":"https://www.wikidata.org/wiki/Q944584","display_name":"Telephony","level":2,"score":0.4746958613395691},{"id":"https://openalex.org/C2776096036","wikidata":"https://www.wikidata.org/wiki/Q1140483","display_name":"Narrowband","level":2,"score":0.44648081064224243},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4334617257118225},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4326789677143097},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4158661961555481},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35122328996658325},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tnn.2002.1021891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnn.2002.1021891","pdf_url":null,"source":{"id":"https://openalex.org/S42080949","display_name":"IEEE Transactions on Neural Networks","issn_l":"1045-9227","issn":["1045-9227","1941-0093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks","raw_type":"journal-article"},{"id":"pmid:18244486","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/18244486","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks","raw_type":null},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.19.8378","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8378","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ivpl.ece.northwestern.edu/Publications/Journals/2002/jay_akk_nn.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5699999928474426,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W90651302","https://openalex.org/W160985744","https://openalex.org/W1505362415","https://openalex.org/W1551013612","https://openalex.org/W1560013842","https://openalex.org/W1935012542","https://openalex.org/W1971950593","https://openalex.org/W1975270274","https://openalex.org/W1976592770","https://openalex.org/W1992227954","https://openalex.org/W2002591263","https://openalex.org/W2013699441","https://openalex.org/W2014621385","https://openalex.org/W2021279213","https://openalex.org/W2021539203","https://openalex.org/W2042389749","https://openalex.org/W2046909118","https://openalex.org/W2047618352","https://openalex.org/W2055997173","https://openalex.org/W2072083949","https://openalex.org/W2080416463","https://openalex.org/W2087964571","https://openalex.org/W2089551028","https://openalex.org/W2091930743","https://openalex.org/W2111649735","https://openalex.org/W2118901858","https://openalex.org/W2120157855","https://openalex.org/W2124174353","https://openalex.org/W2133826808","https://openalex.org/W2147885303","https://openalex.org/W2153664791","https://openalex.org/W2154920538","https://openalex.org/W2169321162","https://openalex.org/W2271670089","https://openalex.org/W2397526523","https://openalex.org/W2413021556","https://openalex.org/W4245419619","https://openalex.org/W4396560572","https://openalex.org/W6603731878","https://openalex.org/W6606539960","https://openalex.org/W6676585450","https://openalex.org/W6694104871"],"related_works":["https://openalex.org/W2164147372","https://openalex.org/W2550171623","https://openalex.org/W4253660971","https://openalex.org/W1480529103","https://openalex.org/W1909292483","https://openalex.org/W1428730622","https://openalex.org/W1658560081","https://openalex.org/W2146616055","https://openalex.org/W2121531770","https://openalex.org/W2169759054"],"abstract_inverted_index":{"Emerging":[0],"broadband":[1],"communication":[2],"systems":[3,39],"promise":[4],"a":[5,80,129],"future":[6],"of":[7,13,26,49,94,102,114,125,170],"multimedia":[8],"telephony,":[9],"e.g.":[10],"the":[11,24,28,47,55,62,69,92,100,103,111,115,158,164,168,186,207],"addition":[12],"visual":[14,51,85,143,161],"information":[15,30],"to":[16,22,96,137,174,197],"telephone":[17],"conversations.":[18],"It":[19],"is":[20,66,128,135],"useful":[21,31],"consider":[23],"problem":[25,48,98],"generating":[27],"critical":[29],"for":[32,41,75,119,145,157],"speechreading,":[33],"based":[34],"on":[35,46],"existing":[36],"narrowband":[37],"communications":[38],"used":[40],"speech.":[42],"This":[43,77,148],"paper":[44,78,127],"focuses":[45],"synthesizing":[50],"articulatory":[52,71],"movements":[53,72],"given":[54],"acoustic":[56,63,141,159],"speech":[57,64,86,212],"signal.":[58],"In":[59],"this":[60,97,126],"application,":[61],"signal":[65],"analyzed":[67],"and":[68,110,142,160],"corresponding":[70],"are":[73,99],"synthesized":[74],"speechreading.":[76],"describes":[79],"hidden":[81],"Markov":[82],"model":[83,133,149,155,166,209],"(HMM)-based":[84],"synthesizer.":[87],"The":[88,122],"key":[89,108],"elements":[90],"in":[91,153],"application":[93],"HMMs":[95,144],"decomposition":[101],"overall":[104],"modeling":[105,177],"task":[106],"into":[107],"stages":[109],"judicious":[112],"determination":[113],"observation":[116],"vector's":[117],"components":[118],"each":[120],"stage.":[121],"main":[123],"contribution":[124],"novel":[130],"correlation":[131],"HMM":[132],"that":[134,185,206],"able":[136],"integrate":[138],"independently":[139],"trained":[140],"speech-to-visual":[146],"synthesis.":[147],"allows":[150],"increased":[151],"flexibility":[152],"choosing":[154],"topologies":[156],"HMMs.":[162],"Moreover":[163],"propose":[165,187],"reduces":[167],"amount":[169],"training":[171],"data":[172],"compared":[173,196],"early":[175],"integration":[176],"techniques.":[178],"Results":[179],"from":[180],"objective":[181],"experiments":[182],"analysis":[183],"show":[184],"approach":[188],"can":[189,210],"reduce":[190],"time":[191],"alignment":[192],"errors":[193],"by":[194],"37.4%":[195],"conventional":[198],"temporal":[199],"scaling":[200],"method.":[201],"Furthermore,":[202],"subjective":[203],"results":[204],"indicated":[205],"purpose":[208],"increase":[211],"understanding.":[213]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
