{"id":"https://openalex.org/W4224213421","doi":"https://doi.org/10.1109/icassp43922.2022.9746762","title":"Audio-Visual Wake Word Spotting System for MISP Challenge 2021","display_name":"Audio-Visual Wake Word Spotting System for MISP Challenge 2021","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4224213421","doi":"https://doi.org/10.1109/icassp43922.2022.9746762"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746762","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746762","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112870466","display_name":"Yanguang Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128412","display_name":"Beike Biotechnology (China)","ror":"https://ror.org/03fd44j44","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128412"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yanguang Xu","raw_affiliation_strings":["Beike,Beijing,China","Beike, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beike,Beijing,China","institution_ids":["https://openalex.org/I4210128412"]},{"raw_affiliation_string":"Beike, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100429020","display_name":"Jianwei Sun","orcid":"https://orcid.org/0000-0002-2470-1077"},"institutions":[{"id":"https://openalex.org/I4210128412","display_name":"Beike Biotechnology (China)","ror":"https://ror.org/03fd44j44","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianwei Sun","raw_affiliation_strings":["Beike,Beijing,China","Beike, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beike,Beijing,China","institution_ids":["https://openalex.org/I4210128412"]},{"raw_affiliation_string":"Beike, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100462722","display_name":"Han Yang","orcid":"https://orcid.org/0000-0003-2782-7502"},"institutions":[{"id":"https://openalex.org/I4210128412","display_name":"Beike Biotechnology (China)","ror":"https://ror.org/03fd44j44","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Han","raw_affiliation_strings":["Beike,Beijing,China","Beike, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beike,Beijing,China","institution_ids":["https://openalex.org/I4210128412"]},{"raw_affiliation_string":"Beike, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041886705","display_name":"Shuaijiang Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128412","display_name":"Beike Biotechnology (China)","ror":"https://ror.org/03fd44j44","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuaijiang Zhao","raw_affiliation_strings":["Beike,Beijing,China","Beike, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beike,Beijing,China","institution_ids":["https://openalex.org/I4210128412"]},{"raw_affiliation_string":"Beike, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061005945","display_name":"Chaoyang Mei","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128412","display_name":"Beike Biotechnology (China)","ror":"https://ror.org/03fd44j44","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chaoyang Mei","raw_affiliation_strings":["Beike,Beijing,China","Beike, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beike,Beijing,China","institution_ids":["https://openalex.org/I4210128412"]},{"raw_affiliation_string":"Beike, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102944502","display_name":"Tingwei Guo","orcid":"https://orcid.org/0000-0002-9320-543X"},"institutions":[{"id":"https://openalex.org/I4210128412","display_name":"Beike Biotechnology (China)","ror":"https://ror.org/03fd44j44","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tingwei Guo","raw_affiliation_strings":["Beike,Beijing,China","Beike, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beike,Beijing,China","institution_ids":["https://openalex.org/I4210128412"]},{"raw_affiliation_string":"Beike, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038813097","display_name":"Shuran Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128412","display_name":"Beike Biotechnology (China)","ror":"https://ror.org/03fd44j44","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuran Zhou","raw_affiliation_strings":["Beike,Beijing,China","Beike, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beike,Beijing,China","institution_ids":["https://openalex.org/I4210128412"]},{"raw_affiliation_string":"Beike, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052269761","display_name":"Chuandong Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128412","display_name":"Beike Biotechnology (China)","ror":"https://ror.org/03fd44j44","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chuandong Xie","raw_affiliation_strings":["Beike,Beijing,China","Beike, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beike,Beijing,China","institution_ids":["https://openalex.org/I4210128412"]},{"raw_affiliation_string":"Beike, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101808317","display_name":"Wei Zou","orcid":"https://orcid.org/0000-0003-4307-5355"},"institutions":[{"id":"https://openalex.org/I4210128412","display_name":"Beike Biotechnology (China)","ror":"https://ror.org/03fd44j44","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Zou","raw_affiliation_strings":["Beike,Beijing,China","Beike, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beike,Beijing,China","institution_ids":["https://openalex.org/I4210128412"]},{"raw_affiliation_string":"Beike, Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081173423","display_name":"Xiangang Li","orcid":"https://orcid.org/0000-0002-7810-1077"},"institutions":[{"id":"https://openalex.org/I4210128412","display_name":"Beike Biotechnology (China)","ror":"https://ror.org/03fd44j44","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangang Li","raw_affiliation_strings":["Beike,Beijing,China","Beike, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beike,Beijing,China","institution_ids":["https://openalex.org/I4210128412"]},{"raw_affiliation_string":"Beike, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5112870466"],"corresponding_institution_ids":["https://openalex.org/I4210128412"],"apc_list":null,"apc_paid":null,"fwci":1.1046,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.78894155,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8344467878341675},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.8305940628051758},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.591230571269989},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5829054117202759},{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.5199272036552429},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5135231614112854},{"id":"https://openalex.org/keywords/microphone-array","display_name":"Microphone array","score":0.5131112337112427},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4710274338722229},{"id":"https://openalex.org/keywords/beamforming","display_name":"Beamforming","score":0.46449828147888184},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.4636894762516022},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.3487620949745178},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.11122071743011475}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8344467878341675},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.8305940628051758},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.591230571269989},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5829054117202759},{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.5199272036552429},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5135231614112854},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.5131112337112427},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4710274338722229},{"id":"https://openalex.org/C54197355","wikidata":"https://www.wikidata.org/wiki/Q5782992","display_name":"Beamforming","level":2,"score":0.46449828147888184},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.4636894762516022},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.3487620949745178},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.11122071743011475},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746762","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746762","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","display_name":"Life in Land","score":0.4300000071525574}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1987454298","https://openalex.org/W2096733369","https://openalex.org/W2167338739","https://openalex.org/W2285716245","https://openalex.org/W2889839708","https://openalex.org/W2936774411","https://openalex.org/W2963414149","https://openalex.org/W2987228815","https://openalex.org/W3007947150","https://openalex.org/W3016011581","https://openalex.org/W3033210410","https://openalex.org/W3082089446","https://openalex.org/W3160658686","https://openalex.org/W3162293946","https://openalex.org/W3162707322","https://openalex.org/W3182657421","https://openalex.org/W3196575740","https://openalex.org/W3196777808","https://openalex.org/W3197089327","https://openalex.org/W3197459407","https://openalex.org/W4224933780","https://openalex.org/W6749714907","https://openalex.org/W6769728370","https://openalex.org/W6779248606","https://openalex.org/W6782374147"],"related_works":["https://openalex.org/W2918559346","https://openalex.org/W3119978414","https://openalex.org/W2114097550","https://openalex.org/W2545741539","https://openalex.org/W2516975559","https://openalex.org/W3206647229","https://openalex.org/W4286904253","https://openalex.org/W2000885660","https://openalex.org/W1969408022","https://openalex.org/W1989658893"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"the":[3,10,37,46,67,86,89,102,141,145],"details":[4],"of":[5,13,24,40,53,92],"our":[6,119,159],"system":[7],"designed":[8],"for":[9,127],"Task":[11,25],"1":[12,26],"Multimodal":[14],"Information":[15],"Based":[16],"Speech":[17],"Processing":[18],"(MISP)":[19],"Challenge":[20],"2021.":[21],"The":[22,134],"purpose":[23],"is":[27,95,105,137],"to":[28,35,65,78,97,107,139,157],"leverage":[29],"both":[30],"audio":[31,109],"and":[32,60,110,113,132,143],"video":[33,87],"information":[34],"improve":[36,144],"environmental":[38],"robustness":[39],"far-field":[41,83],"wake":[42],"word":[43],"spotting.":[44],"In":[45],"proposed":[47,106],"system,":[48],"firstly,":[49],"we":[50],"take":[51],"advantage":[52],"speech":[54],"enhancement":[55],"algorithms":[56],"such":[57,129],"as":[58,130],"beamforming":[59],"weighted":[61],"prediction":[62],"error":[63],"(WPE)":[64],"address":[66],"multi-microphone":[68],"conversational":[69],"audio.":[70],"Secondly,":[71],"several":[72],"data":[73],"augmentation":[74],"techniques":[75],"are":[76,116,152],"applied":[77],"simulate":[79],"a":[80],"more":[81],"realistic":[82],"scenario.":[84],"For":[85],"information,":[88],"provided":[90],"region":[91],"interest":[93],"(ROI)":[94],"used":[96,138],"obtain":[98],"visual":[99,111],"representation.":[100],"Then":[101],"multi-layer":[103],"CNN":[104],"learn":[108],"representations,":[112],"these":[114],"representations":[115],"fed":[117],"into":[118],"two-branch":[120],"attention-based":[121],"net-work":[122],"which":[123],"can":[124],"be":[125],"employed":[126],"fusion,":[128],"transformer":[131],"conformer.":[133],"focal":[135],"loss":[136],"fine-tune":[140],"model":[142],"performance":[146],"significantly.":[147],"Finally,":[148],"multiple":[149],"trained":[150],"models":[151],"integrated":[153],"by":[154],"casting":[155],"vote":[156],"achieve":[158],"final":[160],"0.091":[161],"score.":[162]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
