{"id":"https://openalex.org/W2285716245","doi":"https://doi.org/10.1109/tmm.2016.2520091","title":"A Novel Lip Descriptor for Audio-Visual Keyword Spotting Based on Adaptive Decision Fusion","display_name":"A Novel Lip Descriptor for Audio-Visual Keyword Spotting Based on Adaptive Decision Fusion","publication_year":2016,"publication_date":"2016-01-21","ids":{"openalex":"https://openalex.org/W2285716245","doi":"https://doi.org/10.1109/tmm.2016.2520091","mag":"2285716245"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2016.2520091","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2016.2520091","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-02535026","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101459282","display_name":"Pingping Wu","orcid":"https://orcid.org/0000-0002-7028-4200"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Pingping Wu","raw_affiliation_strings":["Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University, Beijing, China","Key Laboratory of Machine Perception (MOE) (Peking University Department of Machine Intelligence School of Electronics Engineering and Computer Science Peking University, Beijing P.R. China 100871 - China)"],"affiliations":[{"raw_affiliation_string":"Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Key Laboratory of Machine Perception (MOE) (Peking University Department of Machine Intelligence School of Electronics Engineering and Computer Science Peking University, Beijing P.R. China 100871 - China)","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100410286","display_name":"Hong Liu","orcid":"https://orcid.org/0000-0002-0896-8409"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Liu","raw_affiliation_strings":["Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University, Beijing, China","Key Laboratory of Machine Perception (MOE) (Peking University Department of Machine Intelligence School of Electronics Engineering and Computer Science Peking University, Beijing P.R. China 100871 - China)"],"affiliations":[{"raw_affiliation_string":"Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Key Laboratory of Machine Perception (MOE) (Peking University Department of Machine Intelligence School of Electronics Engineering and Computer Science Peking University, Beijing P.R. China 100871 - China)","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100358313","display_name":"Xiaofei Li","orcid":"https://orcid.org/0000-0003-0393-9905"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]},{"id":"https://openalex.org/I4210101348","display_name":"Centre Inria de l'Universit\u00e9 Grenoble Alpes","ror":"https://ror.org/00n8d6z93","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210101348"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Xiaofei Li","raw_affiliation_strings":["PERCEPTION Team, INRIA Grenoble Rh\u00f4ne-Alpes, France","PERCEPTION - Interpretation and Modelling of Images and Videos (INRIA Rh\u00f4ne-Alpes 655 avenue de l'Europe 38330 Montbonnot, France - France)"],"affiliations":[{"raw_affiliation_string":"PERCEPTION Team, INRIA Grenoble Rh\u00f4ne-Alpes, France","institution_ids":["https://openalex.org/I1326498283"]},{"raw_affiliation_string":"PERCEPTION - Interpretation and Modelling of Images and Videos (INRIA Rh\u00f4ne-Alpes 655 avenue de l'Europe 38330 Montbonnot, France - France)","institution_ids":["https://openalex.org/I4210101348","https://openalex.org/I1326498283"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061734606","display_name":"Ting Fan","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ting Fan","raw_affiliation_strings":["Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University, Beijing, China","Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University (China)"],"affiliations":[{"raw_affiliation_string":"Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University (China)","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100768034","display_name":"Xuewu Zhang","orcid":"https://orcid.org/0000-0002-0265-3967"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuewu Zhang","raw_affiliation_strings":["Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University, Beijing, China","Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University (China)"],"affiliations":[{"raw_affiliation_string":"Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Engineering Lab on Intelligent Perception for Internet of Things (ELIP), Shenzhen Graduate School, Peking University (China)","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101459282"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":3.7827,"has_fulltext":false,"cited_by_count":57,"citation_normalized_percentile":{"value":0.93944266,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"18","issue":"3","first_page":"326","last_page":"338"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13289","display_name":"Infant Health and Development","score":0.9757000207901001,"subfield":{"id":"https://openalex.org/subfields/3611","display_name":"Pharmacy"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.8979778289794922},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.846520721912384},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6762621402740479},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6494606733322144},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6424539089202881},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5954217910766602},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5851309895515442},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.5152555108070374},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.513545036315918},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.46018868684768677},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.45469892024993896},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4538283944129944},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.08583724498748779}],"concepts":[{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.8979778289794922},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.846520721912384},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6762621402740479},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6494606733322144},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6424539089202881},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5954217910766602},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5851309895515442},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.5152555108070374},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.513545036315918},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.46018868684768677},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.45469892024993896},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4538283944129944},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.08583724498748779},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tmm.2016.2520091","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2016.2520091","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-02535026v1","is_oa":true,"landing_page_url":"https://hal.science/hal-02535026","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Multimedia, 2016, 18 (3), pp.326-338. &#x27E8;10.1109/TMM.2016.2520091&#x27E9;","raw_type":"Journal articles"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-02535026v1","is_oa":true,"landing_page_url":"https://hal.science/hal-02535026","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Multimedia, 2016, 18 (3), pp.326-338. &#x27E8;10.1109/TMM.2016.2520091&#x27E9;","raw_type":"Journal articles"},"sustainable_development_goals":[{"score":0.6299999952316284,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G1680957761","display_name":null,"funder_award_id":"60675025","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4028253781","display_name":null,"funder_award_id":"61340046","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5134116179","display_name":null,"funder_award_id":"60875050","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W142803501","https://openalex.org/W1520736967","https://openalex.org/W1529401398","https://openalex.org/W1530111162","https://openalex.org/W1664248204","https://openalex.org/W1796263212","https://openalex.org/W1855204404","https://openalex.org/W1976178451","https://openalex.org/W1986840384","https://openalex.org/W1987454298","https://openalex.org/W1990900815","https://openalex.org/W1990937109","https://openalex.org/W2020144989","https://openalex.org/W2022011789","https://openalex.org/W2024490110","https://openalex.org/W2027922120","https://openalex.org/W2029199293","https://openalex.org/W2029733119","https://openalex.org/W2032558548","https://openalex.org/W2036205336","https://openalex.org/W2038045014","https://openalex.org/W2046399019","https://openalex.org/W2051676197","https://openalex.org/W2060510034","https://openalex.org/W2081258893","https://openalex.org/W2084502373","https://openalex.org/W2096703356","https://openalex.org/W2097018403","https://openalex.org/W2100930826","https://openalex.org/W2104263160","https://openalex.org/W2111372597","https://openalex.org/W2122797512","https://openalex.org/W2124397839","https://openalex.org/W2125838338","https://openalex.org/W2127025755","https://openalex.org/W2128057924","https://openalex.org/W2134723767","https://openalex.org/W2134867751","https://openalex.org/W2136000821","https://openalex.org/W2136155248","https://openalex.org/W2138527036","https://openalex.org/W2145215282","https://openalex.org/W2150444217","https://openalex.org/W2151244813","https://openalex.org/W2155289555","https://openalex.org/W2159624360","https://openalex.org/W2162915993","https://openalex.org/W2170660557","https://openalex.org/W2322283567","https://openalex.org/W2405666970","https://openalex.org/W2926870023","https://openalex.org/W4285719527","https://openalex.org/W6605737493","https://openalex.org/W6631338322","https://openalex.org/W6638488279","https://openalex.org/W6638958570","https://openalex.org/W6661609219","https://openalex.org/W6671097792","https://openalex.org/W6674642818","https://openalex.org/W6679949672","https://openalex.org/W6682704335","https://openalex.org/W6713568955"],"related_works":["https://openalex.org/W2918559346","https://openalex.org/W2114097550","https://openalex.org/W4286904253","https://openalex.org/W3119978414","https://openalex.org/W2516975559","https://openalex.org/W3206647229","https://openalex.org/W1969408022","https://openalex.org/W2000885660","https://openalex.org/W2545741539","https://openalex.org/W1989658893"],"abstract_inverted_index":{"Keyword":[0],"spotting":[1,113,178],"remains":[2],"a":[3,51,67,90,109,140],"challenge":[4],"when":[5],"applied":[6],"to":[7,42,84,105,123,133,167,203],"real-world":[8],"environments":[9],"with":[10],"dramatically":[11],"changing":[12],"noise.":[13,31],"In":[14,82],"recent":[15],"studies,":[16],"audio-visual":[17,129,176],"integration":[18],"methods":[19],"have":[20],"demonstrated":[21],"superiorities":[22],"since":[23],"visual":[24,34,146],"speech":[25,35,130],"is":[26,55,72,94,119,198],"not":[27],"influenced":[28],"by":[29],"acoustic":[30,144],"However,":[32],"for":[33],"recognition,":[36],"individual":[37],"utterance":[38],"mannerisms":[39],"can":[40],"lead":[41],"confusion":[43],"and":[44,60,87,101,131,145,154,190],"false":[45],"recognition.":[46],"To":[47],"solve":[48],"this":[49,64],"problem,":[50],"novel":[52],"lip":[53,92,161],"descriptor":[54,162],"presented":[56],"involving":[57],"both":[58],"geometry-based":[59,70],"appearance-based":[61],"features":[62,71],"in":[63,121],"paper.":[65],"Specifically,":[66],"set":[68],"of":[69,128,170,201],"proposed":[73,120,160,175],"based":[74,115,181],"on":[75,116,150,182],"an":[76],"advanced":[77],"facial":[78],"landmark":[79],"localization":[80],"method.":[81],"order":[83,122],"obtain":[85],"robust":[86],"discriminative":[88],"representation,":[89],"spatiotemporal":[91],"feature":[93,104],"put":[95],"forward":[96],"concerning":[97],"similarities":[98],"among":[99],"textons":[100],"mapping":[102],"the":[103,125,151,159,168,171,174,187],"intra-class":[106],"subspace.":[107],"Moreover,":[108],"parallel":[110],"two-step":[111],"keyword":[112,177],"strategy":[114],"decision":[117],"fusion":[118,184],"make":[124],"best":[126],"use":[127],"adapt":[132],"diverse":[134],"noise":[135,188],"conditions.":[136,206],"Weights":[137],"generated":[138],"using":[139],"neural":[141],"network":[142],"combine":[143],"contributions.":[147],"Experimental":[148],"results":[149],"OuluVS":[152],"dataset":[153,156],"PKU-AV":[155],"demonstrate":[157],"that":[158],"shows":[163],"competitive":[164],"performance":[165,193],"compared":[166],"state":[169],"art.":[172],"Additionally,":[173],"(AV-KWS)":[179],"method":[180],"decision-level":[183],"significantly":[185],"improves":[186],"robustness":[189],"attains":[191],"better":[192],"than":[194],"feature-level":[195],"fusion,":[196],"which":[197],"also":[199],"capable":[200],"adapting":[202],"various":[204],"noisy":[205]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
