{"id":"https://openalex.org/W4392908983","doi":"https://doi.org/10.1109/icassp48485.2024.10446042","title":"Efficient Personal Voice Activity Detection with Wake Word Reference Speech","display_name":"Efficient Personal Voice Activity Detection with Wake Word Reference Speech","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392908983","doi":"https://doi.org/10.1109/icassp48485.2024.10446042"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446042","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446042","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024016041","display_name":"Bang Zeng","orcid":null},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]},{"id":"https://openalex.org/I4210159968","display_name":"Duke Kunshan University","ror":"https://ror.org/04sr5ys16","country_code":"CN","type":"education","lineage":["https://openalex.org/I170897317","https://openalex.org/I37461747","https://openalex.org/I4210159968"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bang Zeng","raw_affiliation_strings":["Wuhan University,School of Computer Science,Wuhan,China","School of Computer Science, Wuhan University, Wuhan, China","Suzhou Municipal Key Laboratory of Multimodal Intelligent Systems, Duke Kunshan University, Kunshan, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,School of Computer Science,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"School of Computer Science, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"Suzhou Municipal Key Laboratory of Multimodal Intelligent Systems, Duke Kunshan University, Kunshan, China","institution_ids":["https://openalex.org/I4210159968"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090476096","display_name":"Ming Cheng","orcid":"https://orcid.org/0000-0002-3300-9787"},"institutions":[{"id":"https://openalex.org/I4210159968","display_name":"Duke Kunshan University","ror":"https://ror.org/04sr5ys16","country_code":"CN","type":"education","lineage":["https://openalex.org/I170897317","https://openalex.org/I37461747","https://openalex.org/I4210159968"]},{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Cheng","raw_affiliation_strings":["Wuhan University,School of Computer Science,Wuhan,China","Suzhou Municipal Key Laboratory of Multimodal Intelligent Systems, Duke Kunshan University, Kunshan, China","School of Computer Science, Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,School of Computer Science,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"Suzhou Municipal Key Laboratory of Multimodal Intelligent Systems, Duke Kunshan University, Kunshan, China","institution_ids":["https://openalex.org/I4210159968"]},{"raw_affiliation_string":"School of Computer Science, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100571324","display_name":"Yao Tian","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159156","display_name":"System Equipment (China)","ror":"https://ror.org/04m2bcn74","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210159156"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Tian","raw_affiliation_strings":["OPPO,Data &#x0026; AI Engineering System,Beijing,China"],"affiliations":[{"raw_affiliation_string":"OPPO,Data &#x0026; AI Engineering System,Beijing,China","institution_ids":["https://openalex.org/I4210159156"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115053629","display_name":"Haifeng Liu","orcid":"https://orcid.org/0000-0003-3319-254X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haifeng Liu","raw_affiliation_strings":["University of Science and Technology of China,Hefei,China","University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Hefei,China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100351449","display_name":"Ming Li","orcid":"https://orcid.org/0000-0002-6406-1983"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]},{"id":"https://openalex.org/I4210159968","display_name":"Duke Kunshan University","ror":"https://ror.org/04sr5ys16","country_code":"CN","type":"education","lineage":["https://openalex.org/I170897317","https://openalex.org/I37461747","https://openalex.org/I4210159968"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Li","raw_affiliation_strings":["Wuhan University,School of Computer Science,Wuhan,China","School of Computer Science, Wuhan University, Wuhan, China","Suzhou Municipal Key Laboratory of Multimodal Intelligent Systems, Duke Kunshan University, Kunshan, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,School of Computer Science,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"School of Computer Science, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"Suzhou Municipal Key Laboratory of Multimodal Intelligent Systems, Duke Kunshan University, Kunshan, China","institution_ids":["https://openalex.org/I4210159968"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5024016041"],"corresponding_institution_ids":["https://openalex.org/I37461747","https://openalex.org/I4210159968"],"apc_list":null,"apc_paid":null,"fwci":2.5671,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.90262693,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"12241","last_page":"12245"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8047000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8047000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.7893999814987183,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.7001000046730042,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7182639241218567},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.6827985048294067},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6640445590019226},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.546660840511322},{"id":"https://openalex.org/keywords/wake","display_name":"Wake","score":0.5191525816917419},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3536440134048462},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.29553601145744324},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.23684754967689514},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10381993651390076}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7182639241218567},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.6827985048294067},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6640445590019226},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.546660840511322},{"id":"https://openalex.org/C48939323","wikidata":"https://www.wikidata.org/wiki/Q294879","display_name":"Wake","level":2,"score":0.5191525816917419},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3536440134048462},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.29553601145744324},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.23684754967689514},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10381993651390076},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446042","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446042","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320336618","display_name":"Science and Technology Program of Suzhou","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2003200396","https://openalex.org/W2062335243","https://openalex.org/W2219249508","https://openalex.org/W2696967604","https://openalex.org/W2760103357","https://openalex.org/W2808631503","https://openalex.org/W2892300106","https://openalex.org/W2972552635","https://openalex.org/W3024085360","https://openalex.org/W3097777922","https://openalex.org/W3143923103","https://openalex.org/W3163237592","https://openalex.org/W3196117288","https://openalex.org/W3197478142","https://openalex.org/W3198453247","https://openalex.org/W3207834622","https://openalex.org/W4223655162","https://openalex.org/W4224917363","https://openalex.org/W4296068606","https://openalex.org/W4306867253","https://openalex.org/W4375868885","https://openalex.org/W4375869136","https://openalex.org/W4385245566","https://openalex.org/W4385823278"],"related_works":["https://openalex.org/W3135230428","https://openalex.org/W2296205523","https://openalex.org/W2904739811","https://openalex.org/W3087422378","https://openalex.org/W2152158029","https://openalex.org/W2012540220","https://openalex.org/W2131711534","https://openalex.org/W2559837139","https://openalex.org/W1151175420","https://openalex.org/W2166690696"],"abstract_inverted_index":{"Personal":[0],"voice":[1],"activity":[2],"detection":[3],"(PVAD)":[4],"is":[5,47,118],"gradually":[6],"used":[7],"in":[8,134],"speech":[9,23,56,100,121,141,145],"assistants.":[10],"Traditional":[11],"PVAD":[12,35,69],"schemes":[13],"extract":[14],"the":[15,31,34,40,43,58,93,98,102,128],"target":[16,103],"speaker\u2019s":[17,104],"embedding":[18,46],"from":[19,82],"existing":[20,139],"query":[21,140],"reference":[22,99],"through":[24],"a":[25,65,83],"pre-trained":[26,84],"speaker":[27,45,79,85],"verification":[28,86],"model.":[29,70],"Consequently,":[30],"performance":[32],"of":[33,42,97,130,137],"model":[36,111],"may":[37],"suffer":[38],"if":[39],"quality":[41],"extracted":[44,81],"poor,":[48],"such":[49],"as":[50,57,101,146],"when":[51],"only":[52],"utilizing":[53],"wake":[54,143],"word":[55,144],"reference.":[59,147],"In":[60,71,106],"this":[61,107],"work,":[62],"we":[63],"introduce":[64],"novel":[66],"and":[67],"efficient":[68],"contrast":[72],"to":[73],"conventional":[74],"approaches":[75],"that":[76],"rely":[77],"on":[78],"embeddings":[80],"model,":[87],"our":[88,109,131],"proposed":[89,110,132],"method":[90,133],"directly":[91],"uses":[92],"raw":[94],"frame-level":[95],"features":[96],"attributes.":[105],"way,":[108],"achieves":[112],"an":[113],"ultra-high":[114],"recall":[115],"rate,":[116],"which":[117],"vital":[119],"for":[120],"assistant":[122],"applications.":[123],"The":[124],"experimental":[125],"results":[126],"show":[127],"effectiveness":[129],"both":[135],"cases":[136],"using":[138],"or":[142]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
