{"id":"https://openalex.org/W4312466906","doi":"https://doi.org/10.1109/icccnt54827.2022.9984425","title":"A Method for Voice Activity Detection using K-Means Clustering","display_name":"A Method for Voice Activity Detection using K-Means Clustering","publication_year":2022,"publication_date":"2022-10-03","ids":{"openalex":"https://openalex.org/W4312466906","doi":"https://doi.org/10.1109/icccnt54827.2022.9984425"},"language":"en","primary_location":{"id":"doi:10.1109/icccnt54827.2022.9984425","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icccnt54827.2022.9984425","pdf_url":null,"source":{"id":"https://openalex.org/S4363607876","display_name":"2022 13th International Conference on Computing Communication and Networking Technologies (ICCCNT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 13th International Conference on Computing Communication and Networking Technologies (ICCCNT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008235814","display_name":"Atul Rohit Agarwal","orcid":null},"institutions":[{"id":"https://openalex.org/I876193797","display_name":"Vellore Institute of Technology University","ror":"https://ror.org/00qzypv28","country_code":"IN","type":"education","lineage":["https://openalex.org/I876193797"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Atul Rohit Agarwal","raw_affiliation_strings":["Vellore Institute of Technology,School of Computer Science Engineering,Vellore,India","School of Computer Science Engineering, Vellore Institute of Technology, Vellore, India"],"affiliations":[{"raw_affiliation_string":"Vellore Institute of Technology,School of Computer Science Engineering,Vellore,India","institution_ids":["https://openalex.org/I876193797"]},{"raw_affiliation_string":"School of Computer Science Engineering, Vellore Institute of Technology, Vellore, India","institution_ids":["https://openalex.org/I876193797"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082694737","display_name":"Sourabh Tiwari","orcid":"https://orcid.org/0000-0001-7641-2149"},"institutions":[{"id":"https://openalex.org/I4210139030","display_name":"Samsung (India)","ror":"https://ror.org/04cpx2569","country_code":"IN","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210139030"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sourabh Tiwari","raw_affiliation_strings":["Samsung R&amp;D Institute,Voice Enabling Group,Bangalore,India"],"affiliations":[{"raw_affiliation_string":"Samsung R&amp;D Institute,Voice Enabling Group,Bangalore,India","institution_ids":["https://openalex.org/I4210139030"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034940196","display_name":"Vinay Vasanth Patage","orcid":"https://orcid.org/0009-0009-7633-6989"},"institutions":[{"id":"https://openalex.org/I4210139030","display_name":"Samsung (India)","ror":"https://ror.org/04cpx2569","country_code":"IN","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210139030"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vinay Vasanth Patage","raw_affiliation_strings":["Samsung R&amp;D Institute,Voice Enabling Group,Bangalore,India"],"affiliations":[{"raw_affiliation_string":"Samsung R&amp;D Institute,Voice Enabling Group,Bangalore,India","institution_ids":["https://openalex.org/I4210139030"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101458501","display_name":"S. Sankar Ganesh","orcid":null},"institutions":[{"id":"https://openalex.org/I876193797","display_name":"Vellore Institute of Technology University","ror":"https://ror.org/00qzypv28","country_code":"IN","type":"education","lineage":["https://openalex.org/I876193797"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sankar Ganesh S","raw_affiliation_strings":["Vellore Institute of Technology,Department of Communication Engineering,Vellore,India","Department of Communication Engineering, Vellore Institute of Technology, Vellore, India"],"affiliations":[{"raw_affiliation_string":"Vellore Institute of Technology,Department of Communication Engineering,Vellore,India","institution_ids":["https://openalex.org/I876193797"]},{"raw_affiliation_string":"Department of Communication Engineering, Vellore Institute of Technology, Vellore, India","institution_ids":["https://openalex.org/I876193797"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077962462","display_name":"M. Sudhakar","orcid":null},"institutions":[{"id":"https://openalex.org/I876193797","display_name":"Vellore Institute of Technology University","ror":"https://ror.org/00qzypv28","country_code":"IN","type":"education","lineage":["https://openalex.org/I876193797"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sudhakar M S","raw_affiliation_strings":["Vellore Institute of Technology,Department of Communication Engineering,Vellore,India","Department of Communication Engineering, Vellore Institute of Technology, Vellore, India"],"affiliations":[{"raw_affiliation_string":"Vellore Institute of Technology,Department of Communication Engineering,Vellore,India","institution_ids":["https://openalex.org/I876193797"]},{"raw_affiliation_string":"Department of Communication Engineering, Vellore Institute of Technology, Vellore, India","institution_ids":["https://openalex.org/I876193797"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5008235814"],"corresponding_institution_ids":["https://openalex.org/I876193797"],"apc_list":null,"apc_paid":null,"fwci":0.6145,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.67077409,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.9156061410903931},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8199366927146912},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.770228385925293},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6892476081848145},{"id":"https://openalex.org/keywords/centroid","display_name":"Centroid","score":0.6747130751609802},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5657303333282471},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.561620831489563},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5088005661964417},{"id":"https://openalex.org/keywords/human-voice","display_name":"Human voice","score":0.4657863676548004},{"id":"https://openalex.org/keywords/activity-recognition","display_name":"Activity recognition","score":0.44596683979034424},{"id":"https://openalex.org/keywords/spectral-clustering","display_name":"Spectral clustering","score":0.4431101679801941},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4172343909740448},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39389562606811523},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3781437277793884},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.2821894586086273},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.2576681971549988}],"concepts":[{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.9156061410903931},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8199366927146912},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.770228385925293},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6892476081848145},{"id":"https://openalex.org/C146599234","wikidata":"https://www.wikidata.org/wiki/Q511093","display_name":"Centroid","level":2,"score":0.6747130751609802},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5657303333282471},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.561620831489563},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5088005661964417},{"id":"https://openalex.org/C20766975","wikidata":"https://www.wikidata.org/wiki/Q7390","display_name":"Human voice","level":2,"score":0.4657863676548004},{"id":"https://openalex.org/C121687571","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Activity recognition","level":2,"score":0.44596683979034424},{"id":"https://openalex.org/C105611402","wikidata":"https://www.wikidata.org/wiki/Q2976589","display_name":"Spectral clustering","level":3,"score":0.4431101679801941},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4172343909740448},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39389562606811523},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3781437277793884},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2821894586086273},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2576681971549988},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icccnt54827.2022.9984425","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icccnt54827.2022.9984425","pdf_url":null,"source":{"id":"https://openalex.org/S4363607876","display_name":"2022 13th International Conference on Computing Communication and Networking Technologies (ICCCNT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 13th International Conference on Computing Communication and Networking Technologies (ICCCNT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4099999964237213,"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W86289693","https://openalex.org/W193961407","https://openalex.org/W1976001286","https://openalex.org/W2051170812","https://openalex.org/W2059322891","https://openalex.org/W2158146178","https://openalex.org/W2191779130","https://openalex.org/W2945013390","https://openalex.org/W3015783745","https://openalex.org/W3038871978","https://openalex.org/W3094211490","https://openalex.org/W3126424148","https://openalex.org/W3129056539","https://openalex.org/W3208743843","https://openalex.org/W3209552243","https://openalex.org/W3212264559","https://openalex.org/W3217670778","https://openalex.org/W4224221649","https://openalex.org/W4287848452","https://openalex.org/W6607870299"],"related_works":["https://openalex.org/W2953493266","https://openalex.org/W1517805583","https://openalex.org/W2972243767","https://openalex.org/W3119288895","https://openalex.org/W4312466906","https://openalex.org/W2126850070","https://openalex.org/W2343205865","https://openalex.org/W2525874696","https://openalex.org/W3095390905","https://openalex.org/W109284678"],"abstract_inverted_index":{"Human-Machine":[0],"interaction":[1],"through":[2],"voice":[3,28,35,58,147],"modality":[4],"in":[5,10,61],"recent":[6],"time":[7],"has":[8],"resulted":[9],"both":[11],"research":[12],"and":[13,56,102,132,137,142],"business":[14,18],"use":[15,76],"cases.":[16],"Major":[17],"organizations":[19],"are":[20,120],"making":[21],"the":[22,45,75,113,146],"shift":[23],"towards":[24,48],"developing":[25,49],"their":[26],"state-of-the-art":[27],"assistants,":[29],"whose":[30],"accuracy":[31],"of":[32,44,77,112],"understanding":[33,55],"human":[34],"command,":[36],"is":[37,54],"not":[38],"affected":[39],"to":[40,82,145],"surrounding":[41],"noise.":[42],"One":[43],"first":[46],"steps":[47],"such":[50],"an":[51,62],"interactive":[52],"model":[53],"segmenting":[57],"activity":[59,148],"regions":[60],"audio":[63,91,114],"clip":[64],"from":[65],"all":[66],"other":[67],"noise":[68],"present.":[69],"In":[70],"this":[71,84],"paper":[72],"we":[73],"propose":[74],"a":[78,140],"weighted":[79],"clustering":[80],"approach":[81,136],"solve":[83],"problem.":[85,150],"The":[86],"proposed":[87],"solution":[88,144],"utilizes":[89],"four":[90],"features,":[92],"Mel":[93],"frequency":[94],"cepstral":[95],"coefficients":[96],"(MFCC),":[97],"Spectral":[98,100],"Roll-Off,":[99],"Centroid":[101],"Zero":[103],"Crossing":[104],"Rate":[105],"for":[106],"every":[107],"0.125":[108],"second":[109],"sub":[110,118],"segment":[111],"clip.":[115],"Next,":[116],"these":[117],"segments":[119],"clustered":[121],"using":[122],"K":[123],"Means":[124],"Clustering":[125],"algorithm":[126],"into":[127],"2":[128],"clusters:":[129],"Voice":[130],"Activity":[131],"Noise":[133],"Activity.":[134],"This":[135],"solution,":[138],"provides":[139],"simple":[141],"lightweight":[143],"detection":[149]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
