{"id":"https://openalex.org/W2293862876","doi":"https://doi.org/10.1109/icip.2015.7350911","title":"Regression based landmark estimation and multi-feature fusion for visual speech recognition","display_name":"Regression based landmark estimation and multi-feature fusion for visual speech recognition","publication_year":2015,"publication_date":"2015-09-01","ids":{"openalex":"https://openalex.org/W2293862876","doi":"https://doi.org/10.1109/icip.2015.7350911","mag":"2293862876"},"language":"en","primary_location":{"id":"doi:10.1109/icip.2015.7350911","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icip.2015.7350911","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Image Processing (ICIP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100410326","display_name":"Hong Liu","orcid":"https://orcid.org/0000-0002-7498-6541"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Liu","raw_affiliation_strings":["Key Laboratory of Machine Perception (Ministry of Education), Peking University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Machine Perception (Ministry of Education), Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100768034","display_name":"Xuewu Zhang","orcid":"https://orcid.org/0000-0002-0265-3967"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuewu Zhang","raw_affiliation_strings":["Key Laboratory of Machine Perception (Ministry of Education), Peking University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Machine Perception (Ministry of Education), Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101459282","display_name":"Pingping Wu","orcid":"https://orcid.org/0000-0002-7028-4200"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pingping Wu","raw_affiliation_strings":["Key Laboratory of Machine Perception (Ministry of Education), Peking University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Machine Perception (Ministry of Education), Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.5798,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.70020132,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"2004","issue":null,"first_page":"808","last_page":"812"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9794999957084656,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7942478656768799},{"id":"https://openalex.org/keywords/landmark","display_name":"Landmark","score":0.7505193948745728},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7015361785888672},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6447226405143738},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6289699673652649},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5458486676216125},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.49874448776245117},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.44344931840896606},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.43100079894065857},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.42418766021728516},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3616885542869568}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7942478656768799},{"id":"https://openalex.org/C2780297707","wikidata":"https://www.wikidata.org/wiki/Q4895393","display_name":"Landmark","level":2,"score":0.7505193948745728},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7015361785888672},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6447226405143738},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6289699673652649},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5458486676216125},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.49874448776245117},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.44344931840896606},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.43100079894065857},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.42418766021728516},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3616885542869568},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icip.2015.7350911","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icip.2015.7350911","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Image Processing (ICIP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W12796654","https://openalex.org/W1531976713","https://openalex.org/W1576762698","https://openalex.org/W1796263212","https://openalex.org/W1987454298","https://openalex.org/W1990937109","https://openalex.org/W2023537839","https://openalex.org/W2051676197","https://openalex.org/W2060510034","https://openalex.org/W2108033344","https://openalex.org/W2111124271","https://openalex.org/W2111372597","https://openalex.org/W2113814270","https://openalex.org/W2121486117","https://openalex.org/W2136155248","https://openalex.org/W2138527036","https://openalex.org/W2152826865","https://openalex.org/W2156660920","https://openalex.org/W2157827878","https://openalex.org/W2159624360","https://openalex.org/W2164598857","https://openalex.org/W2487087946","https://openalex.org/W6600520985","https://openalex.org/W6631751486","https://openalex.org/W6638488279"],"related_works":["https://openalex.org/W2056853153","https://openalex.org/W2057559274","https://openalex.org/W2005087563","https://openalex.org/W2378111931","https://openalex.org/W4243161226","https://openalex.org/W2950647290","https://openalex.org/W2620829895","https://openalex.org/W2356918560","https://openalex.org/W1968481813","https://openalex.org/W1996690921"],"abstract_inverted_index":{"Visual":[0],"speech":[1,13,35],"recognition":[2,14],"also":[3,92,111],"known":[4],"as":[5],"lipreading":[6,44],"can":[7,91],"improve":[8],"robustness":[9],"of":[10,28,65,98,147],"automatic":[11,43],"acoustic":[12],"especially":[15],"under":[16],"noisy":[17],"environments.":[18],"However,":[19],"it":[20,90],"remains":[21],"a":[22,48,63,80],"challenging":[23],"topic":[24],"considering":[25],"the":[26,60,96,99,103,115,119,148],"variety":[27],"speaking":[29],"characteristics":[30],"and":[31,53,107,137,144],"confusion":[32],"between":[33],"visual":[34,55],"features.":[36],"In":[37],"this":[38],"paper,":[39],"we":[40],"propose":[41],"an":[42],"method":[45,52,64,127],"by":[46],"using":[47],"new":[49],"lip":[50,75,116],"tracking":[51],"multiple":[54],"information":[56],"fusion":[57],"to":[58,95,113],"tackle":[59],"problem.":[61],"First,":[62],"face":[66],"landmark":[67],"estimation":[68],"based":[69,77],"on":[70,78,130],"regression":[71],"is":[72,86,123,128],"employed":[73],"for":[74],"detection,":[76],"which":[79],"geometric-based":[81],"shape":[82],"invariant":[83],"feature":[84],"(SIF)":[85],"put":[87],"forward.":[88],"Moreover,":[89],"be":[93],"applied":[94],"removal":[97],"non-speaking":[100],"utterance.":[101],"Then":[102],"motion":[104],"interchange":[105],"patterns":[106],"spatial-temporal":[108],"descriptors":[109],"are":[110],"adopted":[112],"describe":[114],"information,":[117],"where":[118],"Bayes":[120],"combination":[121],"strategy":[122],"applied.":[124],"The":[125],"proposed":[126,149],"explored":[129],"three":[131],"benchmark":[132],"data":[133],"sets:":[134],"Avletters2,":[135],"OuluVS":[136],"PKUVS.":[138],"Experimental":[139],"results":[140,143],"demonstrate":[141],"promising":[142],"show":[145],"effectiveness":[146],"approach.":[150]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
