{"id":"https://openalex.org/W4406859261","doi":"https://doi.org/10.1109/apsipaasc63619.2025.10848915","title":"Personal Voice Activity Detection With Ultra-Short Reference Speech","display_name":"Personal Voice Activity Detection With Ultra-Short Reference Speech","publication_year":2024,"publication_date":"2024-12-03","ids":{"openalex":"https://openalex.org/W4406859261","doi":"https://doi.org/10.1109/apsipaasc63619.2025.10848915"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc63619.2025.10848915","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc63619.2025.10848915","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045048913","display_name":"Longting Xu","orcid":"https://orcid.org/0000-0002-2329-895X"},"institutions":[{"id":"https://openalex.org/I181326427","display_name":"Donghua University","ror":"https://ror.org/035psfh38","country_code":"CN","type":"education","lineage":["https://openalex.org/I181326427"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Longting Xu","raw_affiliation_strings":["College of Information Science and Technology, Donghua University,China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Technology, Donghua University,China","institution_ids":["https://openalex.org/I181326427"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100709814","display_name":"Mingjun Zhang","orcid":"https://orcid.org/0000-0001-5212-9322"},"institutions":[{"id":"https://openalex.org/I181326427","display_name":"Donghua University","ror":"https://ror.org/035psfh38","country_code":"CN","type":"education","lineage":["https://openalex.org/I181326427"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingjun Zhang","raw_affiliation_strings":["College of Information Science and Technology, Donghua University,China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Technology, Donghua University,China","institution_ids":["https://openalex.org/I181326427"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100710810","display_name":"Wenbin Zhang","orcid":"https://orcid.org/0000-0002-7681-7530"},"institutions":[{"id":"https://openalex.org/I4210093653","display_name":"Midea Group (China)","ror":"https://ror.org/006fm2278","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210093653"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbin Zhang","raw_affiliation_strings":["AI Research Center, Midea Group (Shanghai) Co.,Ltd.,Shanghai,China,201702"],"affiliations":[{"raw_affiliation_string":"AI Research Center, Midea Group (Shanghai) Co.,Ltd.,Shanghai,China,201702","institution_ids":["https://openalex.org/I4210093653"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100399640","display_name":"Tianyi Wang","orcid":"https://orcid.org/0000-0002-9419-5800"},"institutions":[{"id":"https://openalex.org/I4210093653","display_name":"Midea Group (China)","ror":"https://ror.org/006fm2278","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210093653"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianyi Wang","raw_affiliation_strings":["AI Research Center, Midea Group (Shanghai) Co.,Ltd.,Shanghai,China,201702"],"affiliations":[{"raw_affiliation_string":"AI Research Center, Midea Group (Shanghai) Co.,Ltd.,Shanghai,China,201702","institution_ids":["https://openalex.org/I4210093653"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102661064","display_name":"Jiawei Yin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210093653","display_name":"Midea Group (China)","ror":"https://ror.org/006fm2278","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210093653"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiawei Yin","raw_affiliation_strings":["AI Research Center, Midea Group (Shanghai) Co.,Ltd.,Shanghai,China,201702"],"affiliations":[{"raw_affiliation_string":"AI Research Center, Midea Group (Shanghai) Co.,Ltd.,Shanghai,China,201702","institution_ids":["https://openalex.org/I4210093653"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020803502","display_name":"Yu Gao","orcid":"https://orcid.org/0000-0002-2135-7872"},"institutions":[{"id":"https://openalex.org/I4210093653","display_name":"Midea Group (China)","ror":"https://ror.org/006fm2278","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210093653"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Gao","raw_affiliation_strings":["AI Research Center, Midea Group (Shanghai) Co.,Ltd.,Shanghai,China,201702"],"affiliations":[{"raw_affiliation_string":"AI Research Center, Midea Group (Shanghai) Co.,Ltd.,Shanghai,China,201702","institution_ids":["https://openalex.org/I4210093653"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5045048913"],"corresponding_institution_ids":["https://openalex.org/I181326427"],"apc_list":null,"apc_paid":null,"fwci":1.658,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.8725366,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9312000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9312000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.7489390969276428},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.689051628112793},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6600099802017212},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3585469424724579}],"concepts":[{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.7489390969276428},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.689051628112793},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6600099802017212},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3585469424724579}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc63619.2025.10848915","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc63619.2025.10848915","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1563939609","https://openalex.org/W2062335243","https://openalex.org/W2696967604","https://openalex.org/W2747874407","https://openalex.org/W2760103357","https://openalex.org/W2892300106","https://openalex.org/W3015199127","https://openalex.org/W3024085360","https://openalex.org/W3198453247","https://openalex.org/W4223655162","https://openalex.org/W4224916733","https://openalex.org/W4372260086","https://openalex.org/W4375868885","https://openalex.org/W4385245566","https://openalex.org/W4385823278","https://openalex.org/W4392904540","https://openalex.org/W4392908983","https://openalex.org/W6785635610"],"related_works":["https://openalex.org/W191108438","https://openalex.org/W3135230428","https://openalex.org/W2904739811","https://openalex.org/W249088392","https://openalex.org/W2152158029","https://openalex.org/W2012540220","https://openalex.org/W2559837139","https://openalex.org/W2131711534","https://openalex.org/W2559040841","https://openalex.org/W114661351"],"abstract_inverted_index":{"Personal":[0],"Voice":[1],"Activity":[2],"Detection":[3],"(PVAD)":[4],"is":[5],"widely":[6],"used":[7],"in":[8],"applications":[9],"such":[10],"as":[11,32,127,189,191,194],"voice":[12,18,39,157],"assistants.":[13],"To":[14,47],"accurately":[15],"detect":[16],"the":[17,21,28,36,53,94,98,134,149,195],"activity":[19,158],"of":[20,97,146],"target":[22,29,128,165],"speaker,":[23],"PVAD":[24,56,69,161],"typically":[25],"requires":[26],"pre-registering":[27],"speaker\u2019s":[30],"speech":[31,183],"a":[33,68,119,143],"reference.":[34],"However,":[35],"excessively":[37],"long":[38],"enrollment":[40],"process":[41],"tends":[42],"to":[43,88,112,162],"reduce":[44],"user":[45],"motivation.":[46],"address":[48],"this":[49],"problem,":[50],"we":[51,117],"explore":[52],"possibility":[54],"that":[55,71,81,176],"can":[57,105,152],"maintain":[58],"good":[59],"performance":[60,180],"even":[61,188],"with":[62,142],"short":[63,190],"reference":[64,74,100,171,196],"speech.":[65,101,172,197],"We":[66],"propose":[67],"network":[70,121],"supports":[72],"Ultra-Short":[73],"speech,":[75],"namely":[76],"US-PVAD.":[77],"Unlike":[78],"traditional":[79],"methods":[80],"rely":[82],"on":[83],"pre-trained":[84],"speaker":[85,90,129,139,166],"verification":[86],"models":[87],"extract":[89],"embeddings,":[91],"US-PVAD":[92,177],"allows":[93],"direct":[95],"input":[96],"original":[99],"Since":[102],"RNN":[103,125,150],"states":[104,126,151],"memorize":[106],"historical":[107],"information":[108],"and":[109,122],"use":[110,123],"it":[111],"guide":[113],"subsequent":[114],"time":[115],"steps,":[116],"employ":[118],"DPRNN-based":[120],"its":[124],"embedding.":[130],"This":[131],"approach":[132],"eliminates":[133],"need":[135],"for":[136],"an":[137],"external":[138],"embedding":[140],"extractor":[141],"large":[144],"number":[145],"parameters.":[147],"Additionally,":[148],"be":[153],"continuously":[154],"updated":[155],"during":[156],"detection,":[159],"allowing":[160],"obtain":[163],"sufficient":[164],"feature":[167],"attributes":[168],"from":[169],"ultra-short":[170],"Experimental":[173],"results":[174],"show":[175],"exhibits":[178],"better":[179],"when":[181],"using":[182],"under":[184],"2":[185],"seconds":[186,193],"or":[187],"0.2":[192]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
