{"id":"https://openalex.org/W4205976327","doi":"https://doi.org/10.23919/eusipco54536.2021.9616337","title":"Exploiting Phase-based Features for Whisper vs. Speech Classification","display_name":"Exploiting Phase-based Features for Whisper vs. Speech Classification","publication_year":2021,"publication_date":"2021-08-23","ids":{"openalex":"https://openalex.org/W4205976327","doi":"https://doi.org/10.23919/eusipco54536.2021.9616337"},"language":"en","primary_location":{"id":"doi:10.23919/eusipco54536.2021.9616337","is_oa":false,"landing_page_url":"https://doi.org/10.23919/eusipco54536.2021.9616337","pdf_url":null,"source":{"id":"https://openalex.org/S4363607854","display_name":"2021 29th European Signal Processing Conference (EUSIPCO)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 29th European Signal Processing Conference (EUSIPCO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091143209","display_name":"Nirmesh J. Shah","orcid":"https://orcid.org/0000-0002-7294-6757"},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Nirmesh J. Shah","raw_affiliation_strings":["Speech Research Lab, DA-IICT,Gandhinagar,India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, DA-IICT,Gandhinagar,India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112980304","display_name":"M. Ali Basha Shaik","orcid":null},"institutions":[{"id":"https://openalex.org/I4210139030","display_name":"Samsung (India)","ror":"https://ror.org/04cpx2569","country_code":"IN","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210139030"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"M. Ali Basha Shaik","raw_affiliation_strings":["Samsung R&#x0026;D Institute,Bangalore (SRI-B),India"],"affiliations":[{"raw_affiliation_string":"Samsung R&#x0026;D Institute,Bangalore (SRI-B),India","institution_ids":["https://openalex.org/I4210139030"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068152390","display_name":"P. Periyasamy","orcid":"https://orcid.org/0000-0003-2887-2743"},"institutions":[{"id":"https://openalex.org/I4210139030","display_name":"Samsung (India)","ror":"https://ror.org/04cpx2569","country_code":"IN","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210139030"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"P. Periyasamy","raw_affiliation_strings":["Samsung R&#x0026;D Institute,Bangalore (SRI-B),India"],"affiliations":[{"raw_affiliation_string":"Samsung R&#x0026;D Institute,Bangalore (SRI-B),India","institution_ids":["https://openalex.org/I4210139030"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043002276","display_name":"Hemant A. Patil","orcid":"https://orcid.org/0000-0002-4068-2005"},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Hemant A. Patil","raw_affiliation_strings":["Speech Research Lab, DA-IICT,Gandhinagar,India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, DA-IICT,Gandhinagar,India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089254704","display_name":"Vikram Vij","orcid":null},"institutions":[{"id":"https://openalex.org/I4210139030","display_name":"Samsung (India)","ror":"https://ror.org/04cpx2569","country_code":"IN","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210139030"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vikram Vij","raw_affiliation_strings":["Samsung R&#x0026;D Institute,Bangalore (SRI-B),India"],"affiliations":[{"raw_affiliation_string":"Samsung R&#x0026;D Institute,Bangalore (SRI-B),India","institution_ids":["https://openalex.org/I4210139030"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5091143209"],"corresponding_institution_ids":["https://openalex.org/I98389781"],"apc_list":null,"apc_paid":null,"fwci":0.6881,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.7421088,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"21","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7975680232048035},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7785750031471252},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7049887180328369},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5338280200958252},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4851849675178528},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.4738262891769409},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.47058263421058655},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4120749235153198},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39204084873199463},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36699020862579346}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7975680232048035},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7785750031471252},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7049887180328369},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5338280200958252},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4851849675178528},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4738262891769409},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.47058263421058655},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4120749235153198},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39204084873199463},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36699020862579346},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/eusipco54536.2021.9616337","is_oa":false,"landing_page_url":"https://doi.org/10.23919/eusipco54536.2021.9616337","pdf_url":null,"source":{"id":"https://openalex.org/S4363607854","display_name":"2021 29th European Signal Processing Conference (EUSIPCO)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 29th European Signal Processing Conference (EUSIPCO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W37326695","https://openalex.org/W79017063","https://openalex.org/W96541173","https://openalex.org/W177407077","https://openalex.org/W1557133539","https://openalex.org/W1983989471","https://openalex.org/W2006033171","https://openalex.org/W2065147844","https://openalex.org/W2103075368","https://openalex.org/W2108148744","https://openalex.org/W2151046079","https://openalex.org/W2165112771","https://openalex.org/W2219249508","https://openalex.org/W2345844407","https://openalex.org/W2405506115","https://openalex.org/W2476548250","https://openalex.org/W2477452846","https://openalex.org/W2531409750","https://openalex.org/W2547916733","https://openalex.org/W2583038182","https://openalex.org/W2639888510","https://openalex.org/W2768731046","https://openalex.org/W2889933531","https://openalex.org/W2935803161","https://openalex.org/W2937579788","https://openalex.org/W2938526130","https://openalex.org/W2939099741","https://openalex.org/W2963073614","https://openalex.org/W2972924908","https://openalex.org/W2996797022","https://openalex.org/W3105124182","https://openalex.org/W3137116880","https://openalex.org/W4245838404","https://openalex.org/W4251945510","https://openalex.org/W6601540304","https://openalex.org/W6688816777"],"related_works":["https://openalex.org/W2059520982","https://openalex.org/W2106462793","https://openalex.org/W2098265087","https://openalex.org/W2012540220","https://openalex.org/W1628467322","https://openalex.org/W1918035079","https://openalex.org/W2144256180","https://openalex.org/W2536442632","https://openalex.org/W2149702992","https://openalex.org/W2787297041"],"abstract_inverted_index":{"Performance":[0],"of":[1,56,65,73,87,93,114,145,160,188],"Voice":[2],"Assistant":[3],"(VA)":[4],"deteriorates":[5],"notably":[6],"when":[7],"tested":[8],"on":[9,181],"the":[10,20,26,45,52,83,88,94,127,131,143,149,168,182,189],"whispered":[11,89,175],"speech.":[12,90],"Hence,":[13],"separate":[14],"systems":[15],"are":[16,164],"being":[17],"developed":[18],"for":[19,82,152,185],"whisper.":[21],"To":[22],"that":[23],"effect,":[24],"detecting":[25],"incoming":[27],"signal":[28],"as":[29],"to":[30,60,70,130,173],"whether":[31],"it":[32],"is":[33,48,98,140],"a":[34,37,41],"whisper":[35],"or":[36],"speech":[38,176,190],"(especially":[39],"with":[40,126],"low":[42],"latency)":[43],"in":[44,123,142],"noisy":[46],"environments":[47],"more":[49],"desirable":[50],"from":[51],"model":[53],"switching":[54],"point":[55],"view.":[57],"We":[58,116],"propose":[59],"exploit":[61],"high":[62],"resolution":[63],"property":[64],"group":[66,161],"delay":[67,162],"spectrum":[68],"(GDSPEC)":[69],"capture":[71],"characteristic":[72],"excitation":[74],"source":[75],"(voiced":[76],"vs.":[77],"unvoiced)":[78],"and":[79,111,119,134],"formant":[80],"shift":[81],"early":[84],"robust":[85],"detection":[86],"The":[91],"effectiveness":[92],"proposed":[95],"feature":[96],"set":[97],"investigated":[99],"across":[100],"different":[101,153,186],"deep":[102],"learning-based":[103],"classifiers":[104],"using":[105],"three":[106],"databases,":[107],"namely,":[108],"wTIMIT,":[109],"CHAINS,":[110],"in-house":[112],"database":[113],"Samsung.":[115],"obtain":[117],"3.4%,":[118],"5.05%":[120],"relative":[121],"improvement":[122],"classification":[124],"accuracy":[125],"SEPC+GDSPEC":[128],"compared":[129],"individual":[132],"SPEC,":[133],"GDSPEC":[135],"features,":[136],"respectively.":[137],"Furthermore,":[138],"robustness":[139,159],"shown":[141],"presence":[144],"state-of-the-art":[146],"noises":[147],"(from":[148],"MUSAN":[150],"database)":[151],"SNR":[154],"levels.":[155],"Mathematical":[156],"intuitions":[157],"behind":[158],"functions":[163],"also":[165],"presented.":[166],"Finally,":[167],"frame-level":[169],"decision":[170],"was":[171],"combined":[172],"predict":[174],"at":[177],"an":[178],"utterance-level":[179],"based":[180],"majority":[183],"rule":[184],"lengths":[187],"segments.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
