{"id":"https://openalex.org/W2064347532","doi":"https://doi.org/10.1155/s1110865702206162","title":"Audio-Visual Speech Recognition Using MPEG-4 Compliant Visual Features","display_name":"Audio-Visual Speech Recognition Using MPEG-4 Compliant Visual Features","publication_year":2002,"publication_date":"2002-11-28","ids":{"openalex":"https://openalex.org/W2064347532","doi":"https://doi.org/10.1155/s1110865702206162","mag":"2064347532"},"language":"en","primary_location":{"id":"doi:10.1155/s1110865702206162","is_oa":true,"landing_page_url":"https://doi.org/10.1155/s1110865702206162","pdf_url":"https://asp-eurasipjournals.springeropen.com/counter/pdf/10.1155/S1110865702206162","source":{"id":"https://openalex.org/S35920007","display_name":"EURASIP Journal on Advances in Signal Processing","issn_l":"1687-6172","issn":["1687-6172","1687-6180"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Advances in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://asp-eurasipjournals.springeropen.com/counter/pdf/10.1155/S1110865702206162","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081890177","display_name":"Petar Aleksic","orcid":null},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Petar S. Aleksic","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northwestern University, 2145 North Sheridan Road, Evanston, IL, 60208-3118, USA","[Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northwestern University, 2145 North Sheridan Road, Evanston, IL, 60208-3118, USA","institution_ids":["https://openalex.org/I111979921"]},{"raw_affiliation_string":"[Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL]","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078726591","display_name":"J.J. Williams","orcid":null},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jay J. Williams","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northwestern University, 2145 North Sheridan Road, Evanston, IL, 60208-3118, USA","[Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northwestern University, 2145 North Sheridan Road, Evanston, IL, 60208-3118, USA","institution_ids":["https://openalex.org/I111979921"]},{"raw_affiliation_string":"[Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL]","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000773370","display_name":"Zhilin Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhilin Wu","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northwestern University, 2145 North Sheridan Road, Evanston, IL, 60208-3118, USA","[Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northwestern University, 2145 North Sheridan Road, Evanston, IL, 60208-3118, USA","institution_ids":["https://openalex.org/I111979921"]},{"raw_affiliation_string":"[Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL]","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048650003","display_name":"Aggelos K. Katsaggelos","orcid":"https://orcid.org/0000-0003-4554-0070"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aggelos K. Katsaggelos","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northwestern University, 2145 North Sheridan Road, Evanston, IL, 60208-3118, USA","[Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northwestern University, 2145 North Sheridan Road, Evanston, IL, 60208-3118, USA","institution_ids":["https://openalex.org/I111979921"]},{"raw_affiliation_string":"[Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL]","institution_ids":["https://openalex.org/I111979921"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1140,"currency":"GBP","value_usd":1398},"apc_paid":{"value":1140,"currency":"GBP","value_usd":1398},"fwci":3.7614,"has_fulltext":true,"cited_by_count":58,"citation_normalized_percentile":{"value":0.94087203,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"2002","issue":"11","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.8280562162399292},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8042819499969482},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.7076683044433594},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.6312043070793152},{"id":"https://openalex.org/keywords/speechreading","display_name":"Speechreading","score":0.5415066480636597},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5131919980049133},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.5011646747589111},{"id":"https://openalex.org/keywords/word-recognition","display_name":"Word recognition","score":0.43049874901771545},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38664108514785767},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.24682879447937012}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.8280562162399292},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8042819499969482},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.7076683044433594},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.6312043070793152},{"id":"https://openalex.org/C2910309083","wikidata":"https://www.wikidata.org/wiki/Q1069953","display_name":"Speechreading","level":2,"score":0.5415066480636597},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5131919980049133},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.5011646747589111},{"id":"https://openalex.org/C150856459","wikidata":"https://www.wikidata.org/wiki/Q8034367","display_name":"Word recognition","level":3,"score":0.43049874901771545},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38664108514785767},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.24682879447937012},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1155/s1110865702206162","is_oa":true,"landing_page_url":"https://doi.org/10.1155/s1110865702206162","pdf_url":"https://asp-eurasipjournals.springeropen.com/counter/pdf/10.1155/S1110865702206162","source":{"id":"https://openalex.org/S35920007","display_name":"EURASIP Journal on Advances in Signal Processing","issn_l":"1687-6172","issn":["1687-6172","1687-6180"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Advances in Signal Processing","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:80a40f4309ac4e188b38658ab49a002f","is_oa":true,"landing_page_url":"https://doaj.org/article/80a40f4309ac4e188b38658ab49a002f","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"EURASIP Journal on Advances in Signal Processing, Vol 2002, Iss 11, p 150948 (2002)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1155/s1110865702206162","is_oa":true,"landing_page_url":"https://doi.org/10.1155/s1110865702206162","pdf_url":"https://asp-eurasipjournals.springeropen.com/counter/pdf/10.1155/S1110865702206162","source":{"id":"https://openalex.org/S35920007","display_name":"EURASIP Journal on Advances in Signal Processing","issn_l":"1687-6172","issn":["1687-6172","1687-6180"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Advances in Signal Processing","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6899999976158142,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2064347532.pdf","grobid_xml":"https://content.openalex.org/works/W2064347532.grobid-xml"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W22517275","https://openalex.org/W37491457","https://openalex.org/W177229737","https://openalex.org/W205850768","https://openalex.org/W586860715","https://openalex.org/W1560013842","https://openalex.org/W1563256181","https://openalex.org/W1869491686","https://openalex.org/W1929897159","https://openalex.org/W1964178275","https://openalex.org/W2021279213","https://openalex.org/W2033895145","https://openalex.org/W2072083949","https://openalex.org/W2080921589","https://openalex.org/W2096651761","https://openalex.org/W2102024456","https://openalex.org/W2102863932","https://openalex.org/W2104095591","https://openalex.org/W2106137268","https://openalex.org/W2106160885","https://openalex.org/W2107639756","https://openalex.org/W2121486117","https://openalex.org/W2125848778","https://openalex.org/W2135212327","https://openalex.org/W2145803225","https://openalex.org/W2149310520","https://openalex.org/W2150060382","https://openalex.org/W2151043030","https://openalex.org/W2157190406","https://openalex.org/W2162837059","https://openalex.org/W2163680580","https://openalex.org/W2340480757","https://openalex.org/W2413021556","https://openalex.org/W3099202502","https://openalex.org/W3169507310","https://openalex.org/W4231991966","https://openalex.org/W4388277543","https://openalex.org/W6670794517","https://openalex.org/W6675770446","https://openalex.org/W6785273600"],"related_works":["https://openalex.org/W1980930823","https://openalex.org/W2122866500","https://openalex.org/W2148167314","https://openalex.org/W2038107365","https://openalex.org/W2121652828","https://openalex.org/W2157598242","https://openalex.org/W3033124456","https://openalex.org/W2620660273","https://openalex.org/W22517275","https://openalex.org/W2951672008"],"abstract_inverted_index":{"We":[0,48],"describe":[1,50],"an":[2],"audio-visual":[3,108],"automatic":[4,54,109],"continuous":[5],"speech":[6,12,110,143,181,200],"recognition":[7,13,111,144,182,201],"system,":[8,128],"which":[9,65],"significantly":[10],"improves":[11],"performance":[14],"over":[15],"a":[16,51,136],"wide":[17],"range":[18],"of":[19,46,91],"acoustic":[20],"noise":[21,161],"levels,":[22],"as":[23,25,103],"well":[24],"under":[26,203],"clean":[27,150,204],"audio":[28,130,151,154,205],"conditions.":[29,206],"The":[30,75,146,165],"system":[31,167],"utilizes":[32],"facial":[33],"animation":[34],"parameters":[35],"(FAPs)":[36],"supported":[37],"by":[38,157,174,195],"the":[39,43,83,89,92,97,107,126,169],"MPEG-4":[40],"standard":[41],"for":[42],"visual":[44,63,93,104,132],"representation":[45],"speech.":[47],"also":[49],"robust":[52],"and":[53,96,116,131,134,153,194],"algorithm":[55],"we":[56],"have":[57],"developed":[58],"to":[59,87,124,176,179,198],"extract":[60],"FAPs":[61,84],"from":[62],"data,":[64],"does":[66],"not":[67],"require":[68],"hand":[69],"labeling":[70],"or":[71],"extensive":[72],"training":[73],"procedures.":[74],"principal":[76],"component":[77],"analysis":[78],"(PCA)":[79],"was":[80],"performed":[81,148],"on":[82],"in":[85,106],"order":[86],"decrease":[88],"dimensionality":[90],"feature":[94],"vectors,":[95],"derived":[98],"projection":[99],"weights":[100],"were":[101,122],"used":[102,123],"features":[105],"(ASR)":[112],"experiments.":[113,145],"Both":[114],"single-stream":[115],"multistream":[117],"hidden":[118],"Markov":[119],"models":[120],"(HMMs)":[121],"model":[125],"ASR":[127],"integrate":[129],"information,":[133],"perform":[135],"relatively":[137,178,197],"large":[138],"vocabulary":[139],"(approximately":[140],"1000":[141],"words)":[142],"experiments":[147],"use":[149],"data":[152,155],"corrupted":[156],"stationary":[158],"white":[159,191],"Gaussian":[160,192],"at":[162,184],"various":[163,185],"SNRs.":[164],"proposed":[166],"reduces":[168],"word":[170],"error":[171],"rate":[172],"(WER)":[173],"20%":[175],"23%":[177],"audio-only":[180,199],"WERs,":[183],"SNRs":[186],"(0\u201330":[187],"dB)":[188],"with":[189],"additive":[190],"noise,":[193],"19%":[196],"WER":[202]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
