{"id":"https://openalex.org/W4297841644","doi":"https://doi.org/10.21437/interspeech.2022-10650","title":"Audio-Visual Wake Word Spotting in MISP2021 Challenge: Dataset Release and Deep Analysis","display_name":"Audio-Visual Wake Word Spotting in MISP2021 Challenge: Dataset Release and Deep Analysis","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4297841644","doi":"https://doi.org/10.21437/interspeech.2022-10650"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-10650","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-10650","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103235094","display_name":"Hengshun Zhou","orcid":"https://orcid.org/0000-0001-7878-6531"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hengshun Zhou","raw_affiliation_strings":["University of Science and Technology of China, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100412669","display_name":"Jun Du","orcid":"https://orcid.org/0000-0002-2171-9001"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jun Du","raw_affiliation_strings":["University of Science and Technology of China, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008518006","display_name":"Gongzhen Zou","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gongzhen Zou","raw_affiliation_strings":["University of Science and Technology of China, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005376611","display_name":"Zhaoxu Nian","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaoxu Nian","raw_affiliation_strings":["University of Science and Technology of China, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066868860","display_name":"Chin\u2010Hui Lee","orcid":"https://orcid.org/0000-0002-1892-2551"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chin-Hui Lee","raw_affiliation_strings":["Georgia Institute of Technology, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079659476","display_name":"Sabato Marco Siniscalchi","orcid":"https://orcid.org/0000-0002-0770-0507"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]},{"id":"https://openalex.org/I246010334","display_name":"Universit\u00e0 degli Studi di Enna Kore","ror":"https://ror.org/04vd28p53","country_code":"IT","type":"education","lineage":["https://openalex.org/I246010334"]}],"countries":["IT","US"],"is_corresponding":false,"raw_author_name":"Sabato Marco Siniscalchi","raw_affiliation_strings":["Kore University of Enna, Italy","Georgia Institute of Technology, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kore University of Enna, Italy","institution_ids":["https://openalex.org/I246010334"]},{"raw_affiliation_string":"Georgia Institute of Technology, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Carnegie Mellon University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048860892","display_name":"Odette Scharenborg","orcid":"https://orcid.org/0000-0003-0693-8852"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Odette Scharenborg","raw_affiliation_strings":["Delft University of Technology, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Delft University of Technology, The Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056129529","display_name":"Jingdong Chen","orcid":"https://orcid.org/0000-0003-0083-9247"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingdong Chen","raw_affiliation_strings":["Northwestern Polytechnical University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021081307","display_name":"Shifu Xiong","orcid":"https://orcid.org/0000-0003-4759-147X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shifu Xiong","raw_affiliation_strings":["iFlytek, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"iFlytek, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100730084","display_name":"Jianqing Gao","orcid":"https://orcid.org/0000-0003-1052-7060"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jian-Qing Gao","raw_affiliation_strings":["iFlytek, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"iFlytek, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100412669"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":1.1416,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.8025974,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9728000164031982,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9553999900817871,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.9590793251991272},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.697886049747467},{"id":"https://openalex.org/keywords/wake","display_name":"Wake","score":0.6527441740036011},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6068673133850098},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5131374001502991},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4443078637123108},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4291527271270752},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11704951524734497},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10567286610603333}],"concepts":[{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.9590793251991272},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.697886049747467},{"id":"https://openalex.org/C48939323","wikidata":"https://www.wikidata.org/wiki/Q294879","display_name":"Wake","level":2,"score":0.6527441740036011},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6068673133850098},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5131374001502991},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4443078637123108},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4291527271270752},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11704951524734497},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10567286610603333},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/interspeech.2022-10650","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-10650","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},{"id":"pmh:oai:iris.unipa.it:10447/641414","is_oa":false,"landing_page_url":"https://hdl.handle.net/10447/641414","pdf_url":null,"source":{"id":"https://openalex.org/S4306401065","display_name":"Nova Science Publishers (Nova Science Publishers, Inc.)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/bookPart"},{"id":"pmh:oai:tudelft.nl:uuid:74fc0816-0901-423d-b917-365574bf24a3","is_oa":false,"landing_page_url":"http://resolver.tudelft.nl/uuid:74fc0816-0901-423d-b917-365574bf24a3","pdf_url":null,"source":{"id":"https://openalex.org/S4306400906","display_name":"Research Repository (Delft University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98358874","host_organization_name":"Delft University of Technology","host_organization_lineage":["https://openalex.org/I98358874"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1713836045","display_name":null,"funder_award_id":"62171427","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W2034940213","https://openalex.org/W2122797512","https://openalex.org/W2148613904","https://openalex.org/W2194775991","https://openalex.org/W2285716245","https://openalex.org/W2763188033","https://openalex.org/W2765407302","https://openalex.org/W2896422817","https://openalex.org/W2937769560","https://openalex.org/W2939412000","https://openalex.org/W2963197115","https://openalex.org/W2965193981","https://openalex.org/W3012505451","https://openalex.org/W3015118013","https://openalex.org/W3016218307","https://openalex.org/W3035682985","https://openalex.org/W3082089446","https://openalex.org/W3093383631","https://openalex.org/W3094791070","https://openalex.org/W3094793624","https://openalex.org/W3095212293","https://openalex.org/W3095735913","https://openalex.org/W3095949666","https://openalex.org/W3096662840","https://openalex.org/W3096976027","https://openalex.org/W3097519469","https://openalex.org/W3097878195","https://openalex.org/W3160056314","https://openalex.org/W3163237592","https://openalex.org/W3176579201","https://openalex.org/W3197949759","https://openalex.org/W4206567542","https://openalex.org/W4221141391","https://openalex.org/W4224213421","https://openalex.org/W4224933780"],"related_works":["https://openalex.org/W2757285599","https://openalex.org/W2376025146","https://openalex.org/W3184921334","https://openalex.org/W2033009170","https://openalex.org/W2028814537","https://openalex.org/W2058491579","https://openalex.org/W1483316057","https://openalex.org/W118429992","https://openalex.org/W4300154353","https://openalex.org/W2970166416"],"abstract_inverted_index":{"&lt;p&gt;In":[0],"this":[1],"paper,":[2],"we":[3,80],"describe":[4],"and":[5,27,34,38,44,52,76,101,119],"release":[6],"publicly":[7,45],"the":[8,16,53,65,82,107,127,133],"audio-visual":[9,98],"wake":[10],"word":[11],"spotting":[12],"(WWS)":[13],"database":[14,47,51],"in":[15,74],"MISP2021":[17],"Challenge,":[18],"which":[19,58],"covers":[20],"a":[21,42,61],"range":[22],"of":[23,25,97],"scenarios":[24],"audio":[26,118],"video":[28,120],"data":[29,84],"collected":[30],"by":[31],"near-,":[32],"mid-,":[33],"far-field":[35],"microphone":[36],"arrays,":[37],"cameras,":[39],"to":[40,64,105,112],"create":[41],"shared":[43],"available":[46],"for":[48,67,87],"WWS.":[49],"The":[50,123],"code":[54],"&lt;sup&gt;2&lt;/sup&gt;":[55],"are":[56],"released,":[57],"will":[59],"be":[60],"valuable":[62],"addition":[63],"community":[66],"promoting":[68],"WWS":[69,93],"research":[70],"using":[71],"multi-modality":[72],"information":[73,111,114],"realistic":[75],"complex":[77,141],"conditions.":[78],"Moreover,":[79],"investigated":[81],"different":[83,117],"augmentation":[85],"methods":[86],"single":[88],"modalities":[89],"on":[90,116],"an":[91],"end-to-end":[92],"network.":[94],"A":[95],"set":[96],"fusion":[99,128],"experiments":[100],"analysis":[102],"were":[103],"conducted":[104],"observe":[106],"assistance":[108],"from":[109],"visual":[110],"acoustic":[113],"based":[115],"field":[121],"configurations.":[122],"results":[124],"showed":[125],"that":[126],"system":[129],"generally":[130],"improves":[131],"over":[132],"single-modality":[134],"(audio-":[135],"or":[136],"video-only)":[137],"system,":[138],"especially":[139],"under":[140],"noisy":[142],"conditions.&lt;/p&gt;":[143]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-30T13:55:48.251075","created_date":"2025-10-10T00:00:00"}
