{"id":"https://openalex.org/W2102006361","doi":"https://doi.org/10.1109/icip.2003.1247173","title":"Audio-visual speaker identification using coupled hidden Markov models","display_name":"Audio-visual speaker identification using coupled hidden Markov models","publication_year":2004,"publication_date":"2004-06-03","ids":{"openalex":"https://openalex.org/W2102006361","doi":"https://doi.org/10.1109/icip.2003.1247173","mag":"2102006361"},"language":"en","primary_location":{"id":"doi:10.1109/icip.2003.1247173","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icip.2003.1247173","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings 2003 International Conference on Image Processing (Cat. No.03CH37429)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080699502","display_name":"Tieyan Fu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tieyan Fu","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, China","Dept. of Computer Science & Technology, Tsinghua University, Beijing, China#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Dept. of Computer Science & Technology, Tsinghua University, Beijing, China#TAB#","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078906994","display_name":"Xiao Xing Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiao Xing Liu","raw_affiliation_strings":["Microprocessor Research Laboratories, Intel Corporation, USA","FU TIEYAN                #N#                            LIU XIAOXING                #N#                            LIANG LUHONG                #N#                            PI XIAOBO                #N#                            NEFIAN ARA VICTOR"],"affiliations":[{"raw_affiliation_string":"Microprocessor Research Laboratories, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]},{"raw_affiliation_string":"FU TIEYAN                #N#                            LIU XIAOXING                #N#                            LIANG LUHONG                #N#                            PI XIAOBO                #N#                            NEFIAN ARA VICTOR","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086514660","display_name":"Lu Liang","orcid":"https://orcid.org/0000-0002-0938-0383"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lu Hong Liang","raw_affiliation_strings":["Microprocessor Research Laboratories, Intel Corporation, USA","FU TIEYAN                #N#                            LIU XIAOXING                #N#                            LIANG LUHONG                #N#                            PI XIAOBO                #N#                            NEFIAN ARA VICTOR"],"affiliations":[{"raw_affiliation_string":"Microprocessor Research Laboratories, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]},{"raw_affiliation_string":"FU TIEYAN                #N#                            LIU XIAOXING                #N#                            LIANG LUHONG                #N#                            PI XIAOBO                #N#                            NEFIAN ARA VICTOR","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027964715","display_name":"Xiaobo Pi","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaobo Pi","raw_affiliation_strings":["Microprocessor Research Laboratories, Intel Corporation, USA","FU TIEYAN                #N#                            LIU XIAOXING                #N#                            LIANG LUHONG                #N#                            PI XIAOBO                #N#                            NEFIAN ARA VICTOR"],"affiliations":[{"raw_affiliation_string":"Microprocessor Research Laboratories, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]},{"raw_affiliation_string":"FU TIEYAN                #N#                            LIU XIAOXING                #N#                            LIANG LUHONG                #N#                            PI XIAOBO                #N#                            NEFIAN ARA VICTOR","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113481101","display_name":"Ara Nefian","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"A.V. Nefian","raw_affiliation_strings":["Microprocessor Research Laboratories, Intel Corporation, USA","FU TIEYAN                #N#                            LIU XIAOXING                #N#                            LIANG LUHONG                #N#                            PI XIAOBO                #N#                            NEFIAN ARA VICTOR"],"affiliations":[{"raw_affiliation_string":"Microprocessor Research Laboratories, Intel Corporation, USA","institution_ids":["https://openalex.org/I1343180700"]},{"raw_affiliation_string":"FU TIEYAN                #N#                            LIU XIAOXING                #N#                            LIANG LUHONG                #N#                            PI XIAOBO                #N#                            NEFIAN ARA VICTOR","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5080699502"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.6826,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.84500054,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"2","issue":null,"first_page":"III","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7791234850883484},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7704629302024841},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7679129242897034},{"id":"https://openalex.org/keywords/maximum-a-posteriori-estimation","display_name":"Maximum a posteriori estimation","score":0.5252073407173157},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46287229657173157},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.45484790205955505},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4542574882507324},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4539102017879486},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.43367859721183777},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.41710084676742554},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4127502143383026},{"id":"https://openalex.org/keywords/maximum-likelihood","display_name":"Maximum likelihood","score":0.18702757358551025},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08512479066848755}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7791234850883484},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7704629302024841},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7679129242897034},{"id":"https://openalex.org/C9810830","wikidata":"https://www.wikidata.org/wiki/Q635384","display_name":"Maximum a posteriori estimation","level":3,"score":0.5252073407173157},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46287229657173157},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.45484790205955505},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4542574882507324},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4539102017879486},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.43367859721183777},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.41710084676742554},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4127502143383026},{"id":"https://openalex.org/C49781872","wikidata":"https://www.wikidata.org/wiki/Q1045555","display_name":"Maximum likelihood","level":2,"score":0.18702757358551025},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08512479066848755},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icip.2003.1247173","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icip.2003.1247173","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings 2003 International Conference on Image Processing (Cat. No.03CH37429)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W13113107","https://openalex.org/W88081813","https://openalex.org/W93715315","https://openalex.org/W172154728","https://openalex.org/W1531846585","https://openalex.org/W1576996184","https://openalex.org/W1599408644","https://openalex.org/W1664547674","https://openalex.org/W1833616679","https://openalex.org/W1978380426","https://openalex.org/W1984530611","https://openalex.org/W1998627190","https://openalex.org/W2041823554","https://openalex.org/W2083792893","https://openalex.org/W2109207304","https://openalex.org/W2112173139","https://openalex.org/W2112348857","https://openalex.org/W2118977726","https://openalex.org/W2121486117","https://openalex.org/W2121753362","https://openalex.org/W2127615162","https://openalex.org/W2131341478","https://openalex.org/W2132999255","https://openalex.org/W2151457493","https://openalex.org/W2152239535","https://openalex.org/W2156034920","https://openalex.org/W2156267227","https://openalex.org/W2159513984","https://openalex.org/W2164450870","https://openalex.org/W2300138058","https://openalex.org/W3099202502","https://openalex.org/W6600554954","https://openalex.org/W6607016223","https://openalex.org/W6682396969","https://openalex.org/W6785273600"],"related_works":["https://openalex.org/W1595191759","https://openalex.org/W2206035908","https://openalex.org/W1493012537","https://openalex.org/W2010299594","https://openalex.org/W2162158162","https://openalex.org/W4247736853","https://openalex.org/W2175373321","https://openalex.org/W2125642021","https://openalex.org/W4310979479","https://openalex.org/W2696990509"],"abstract_inverted_index":{"In":[0],"this":[1,90],"paper,":[2],"we":[3,113,125],"investigate":[4],"the":[5,8,15,26,29,42,46,49,75,87,94,147],"use":[6,78],"of":[7,17,28,35,48,63,79,89,149,158],"coupled":[9],"hidden":[10],"Markov":[11],"models":[12],"(CHMM)":[13],"for":[14,66,71],"task":[16],"audio-visual":[18],"text":[19],"dependent":[20,129],"speaker":[21,117,128,153],"identification.":[22],"Our":[23],"system":[24,83,145],"determines":[25],"identity":[27],"user":[30],"from":[31,41,163],"a":[32,61,111,116,127,133],"temporal":[33],"sequence":[34],"audio":[36,96],"and":[37,45,70,97,123],"visual":[38,98],"observations":[39],"obtained":[40],"acoustic":[43,159],"speech":[44],"shape":[47],"mouth,":[50],"respectively.":[51],"The":[52,77],"multi":[53],"modal":[54],"observation":[55],"sequences":[56],"are":[57],"then":[58,124],"modeled":[59],"using":[60,120,131],"set":[62],"CHMMs,":[64],"one":[65],"each":[67,72],"phoneme-viseme":[68],"pair":[69],"person":[73],"in":[74,81],"database.":[76],"CHMMs":[80],"our":[82,144],"is":[84],"justified":[85],"by":[86],"capacity":[88],"model":[91,119,130],"to":[92,165],"describe":[93],"natural":[95],"state":[99],"asynchrony":[100],"as":[101,103],"well":[102],"their":[104],"conditional":[105],"dependency":[106],"over":[107],"time.":[108],"To":[109],"train":[110,115],"CHMM":[112],"first":[114],"independent":[118],"expectation-maximization":[121],"(EM),":[122],"build":[126],"maximum":[132],"posteriori":[134],"(MAP)":[135],"training.":[136],"Experimental":[137],"results":[138],"on":[139],"XM2VTS":[140],"database":[141],"show":[142],"that":[143],"improves":[146],"accuracy":[148],"audio-only":[150],"or":[151],"video-only":[152],"identification":[154],"at":[155],"all":[156],"levels":[157],"signal-to-noise":[160],"ratio":[161],"(SNR)":[162],"0":[164],"30":[166],"dB.":[167]},"counts_by_year":[{"year":2017,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
