{"id":"https://openalex.org/W2171917108","doi":"https://doi.org/10.1109/icip.2002.1038187","title":"Audio-visual continuous speech recognition using MPEG-4 compliant visual features","display_name":"Audio-visual continuous speech recognition using MPEG-4 compliant visual features","publication_year":2003,"publication_date":"2003-06-25","ids":{"openalex":"https://openalex.org/W2171917108","doi":"https://doi.org/10.1109/icip.2002.1038187","mag":"2171917108"},"language":"en","primary_location":{"id":"doi:10.1109/icip.2002.1038187","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icip.2002.1038187","pdf_url":null,"source":{"id":"https://openalex.org/S4210223844","display_name":"Proceedings - International Conference on Image Processing","issn_l":"1522-4880","issn":["1522-4880","2381-8549"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings. International Conference on Image Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081890177","display_name":"Petar Aleksic","orcid":null},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"P.S. Aleksic","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078726591","display_name":"J.J. Williams","orcid":null},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"J.J. Williams","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000773370","display_name":"Zhilin Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhilin Wu","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048650003","display_name":"Aggelos K. Katsaggelos","orcid":"https://orcid.org/0000-0003-4554-0070"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"A.K. Katsaggelos","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5081890177"],"corresponding_institution_ids":["https://openalex.org/I111979921"],"apc_list":null,"apc_paid":null,"fwci":1.1714,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.81136604,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"1","issue":null,"first_page":"I","last_page":"960"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8306326270103455},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.77979576587677},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7339726090431213},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.5756075978279114},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5026383399963379},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4907861649990082},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.48708227276802063},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4201367199420929},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.36070454120635986},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.3572120666503906},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.22680503129959106}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8306326270103455},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.77979576587677},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7339726090431213},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.5756075978279114},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5026383399963379},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4907861649990082},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.48708227276802063},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4201367199420929},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36070454120635986},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.3572120666503906},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.22680503129959106},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icip.2002.1038187","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icip.2002.1038187","pdf_url":null,"source":{"id":"https://openalex.org/S4210223844","display_name":"Proceedings - International Conference on Image Processing","issn_l":"1522-4880","issn":["1522-4880","2381-8549"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings. International Conference on Image Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5299999713897705,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1869491686","https://openalex.org/W2064347532","https://openalex.org/W2102024456","https://openalex.org/W2104095591","https://openalex.org/W2106160885","https://openalex.org/W2121486117","https://openalex.org/W2157190406","https://openalex.org/W2163680580","https://openalex.org/W2487271655","https://openalex.org/W3099202502","https://openalex.org/W3169507310","https://openalex.org/W6675878783","https://openalex.org/W6785273600"],"related_works":["https://openalex.org/W2150890698","https://openalex.org/W4245698648","https://openalex.org/W2401394187","https://openalex.org/W2405257913","https://openalex.org/W3133710586","https://openalex.org/W2125964738","https://openalex.org/W2098529290","https://openalex.org/W2026402306","https://openalex.org/W2121652828","https://openalex.org/W4385611764"],"abstract_inverted_index":{"We":[0,26],"utilize":[1],"facial":[2],"animation":[3],"parameters":[4],"(FAPs),":[5],"supported":[6],"by":[7,90,108],"the":[8,12,85,112],"MPEG-4":[9],"standard":[10],"for":[11,33],"visual":[13,38,60],"representation":[14],"of":[15,35],"speech,":[16],"in":[17],"order":[18],"to":[19,56,92,95,111],"improve":[20],"automatic":[21,31],"speech":[22],"recognition":[23],"(ASR)":[24],"significantly.":[25],"describe":[27],"a":[28,74],"robust":[29],"and":[30,59,69,107],"algorithm":[32],"extraction":[34],"FAPs":[36],"from":[37],"data":[39],"that":[40],"requires":[41],"no":[42],"hand":[43],"labeling":[44],"or":[45],"extensive":[46],"training":[47],"procedures.":[48],"Multi-stream":[49],"hidden":[50],"Markov":[51],"models":[52],"(HMM)":[53],"are":[54,64],"used":[55],"integrate":[57],"audio":[58,71,118],"information.":[61],"ASR":[62,97,114],"experiments":[63],"performed":[65],"under":[66,116],"both":[67],"clean":[68,117],"noisy":[70],"conditions":[72],"using":[73],"relatively":[75],"large":[76],"vocabulary":[77],"(approximately":[78],"1000":[79],"words).":[80],"The":[81],"proposed":[82],"system":[83],"reduces":[84],"word":[86],"error":[87],"rate":[88],"(WER)":[89],"20%":[91],"23%":[93],"relative":[94,110],"audio-only":[96,113],"WERs,":[98],"at":[99],"various":[100],"SNRs":[101],"with":[102],"additive":[103],"white":[104],"Gaussian":[105],"noise,":[106],"19%":[109],"WER":[115],"conditions.":[119]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
