{"id":"https://openalex.org/W2290391586","doi":"https://doi.org/10.1109/apsipa.2015.7415429","title":"Bottleneck features from SNR-adaptive denoising deep classifier for speaker identification","display_name":"Bottleneck features from SNR-adaptive denoising deep classifier for speaker identification","publication_year":2015,"publication_date":"2015-12-01","ids":{"openalex":"https://openalex.org/W2290391586","doi":"https://doi.org/10.1109/apsipa.2015.7415429","mag":"2290391586"},"language":"en","primary_location":{"id":"doi:10.1109/apsipa.2015.7415429","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipa.2015.7415429","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036359785","display_name":"Zhili Tan","orcid":"https://orcid.org/0000-0002-2445-5240"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Zhili Tan","raw_affiliation_strings":["Dept. of Electronic and Information Engineering, The Hong Kong Polytechnic University, Hong Kong SAR"],"affiliations":[{"raw_affiliation_string":"Dept. of Electronic and Information Engineering, The Hong Kong Polytechnic University, Hong Kong SAR","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068768998","display_name":"Man\u2010Wai Mak","orcid":"https://orcid.org/0000-0001-8854-3760"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Man-Wai Mak","raw_affiliation_strings":["Dept. of Electronic and Information Engineering, The Hong Kong Polytechnic University, Hong Kong SAR"],"affiliations":[{"raw_affiliation_string":"Dept. of Electronic and Information Engineering, The Hong Kong Polytechnic University, Hong Kong SAR","institution_ids":["https://openalex.org/I14243506"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5036359785"],"corresponding_institution_ids":["https://openalex.org/I14243506"],"apc_list":null,"apc_paid":null,"fwci":1.2943,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.86628298,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"11","issue":null,"first_page":"1035","last_page":"1040"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7542051076889038},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7294273376464844},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.6682570576667786},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6440287828445435},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6145824193954468},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.52411949634552},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.5140362977981567},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.48052582144737244},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4742010235786438},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4477706253528595},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.41296982765197754},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.400272399187088}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7542051076889038},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7294273376464844},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.6682570576667786},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6440287828445435},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6145824193954468},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.52411949634552},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.5140362977981567},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.48052582144737244},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4742010235786438},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4477706253528595},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.41296982765197754},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.400272399187088},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/apsipa.2015.7415429","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipa.2015.7415429","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA)","raw_type":"proceedings-article"},{"id":"pmh:oai:ira.lib.polyu.edu.hk:10397/67643","is_oa":false,"landing_page_url":"http://hdl.handle.net/10397/67643","pdf_url":null,"source":{"id":"https://openalex.org/S4306400205","display_name":"PolyU Institutional Research Archive (Hong Kong Polytechnic University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I14243506","host_organization_name":"Hong Kong Polytechnic University","host_organization_lineage":["https://openalex.org/I14243506"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W128628490","https://openalex.org/W1498436455","https://openalex.org/W1994244061","https://openalex.org/W2039057510","https://openalex.org/W2043003570","https://openalex.org/W2062826588","https://openalex.org/W2075278892","https://openalex.org/W2100495367","https://openalex.org/W2102003408","https://openalex.org/W2116064496","https://openalex.org/W2121812409","https://openalex.org/W2136922672","https://openalex.org/W2145094598","https://openalex.org/W2145846419","https://openalex.org/W2146320039","https://openalex.org/W2147768505","https://openalex.org/W2150769028","https://openalex.org/W2163605009","https://openalex.org/W2295991281","https://openalex.org/W2395750323","https://openalex.org/W2397634864","https://openalex.org/W2400388989","https://openalex.org/W2406312423","https://openalex.org/W2407342067","https://openalex.org/W2997574889","https://openalex.org/W6605273041","https://openalex.org/W6629815555","https://openalex.org/W6681096077","https://openalex.org/W6684191040","https://openalex.org/W6696901769","https://openalex.org/W6712325649","https://openalex.org/W6713137374","https://openalex.org/W6713727690"],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W3013693939","https://openalex.org/W2159052453","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W4297051394","https://openalex.org/W3037110488"],"abstract_inverted_index":{"In":[0],"this":[1,112],"paper,":[2],"we":[3],"explore":[4],"the":[5,45,57,63,71,90,99,102,115,118,122,163,168,179,189],"potential":[6],"of":[7,32,39,60,93,101,121,131,178],"using":[8,98],"deep":[9,42,72],"learning":[10],"for":[11,15,150],"extracting":[12],"speaker-dependent":[13,67],"features":[14,137,165,181,191],"noise":[16],"robust":[17],"speaker":[18,151],"identification.":[19,152],"More":[20],"specifically,":[21],"an":[22,143],"SNR-adaptive":[23],"denoising":[24,41,113],"classifier":[25,123],"is":[26,48],"constructed":[27],"by":[28,78,82],"stacking":[29],"two":[30,180,190],"layers":[31],"restricted":[33],"Boltzmann":[34],"machines":[35],"(RBMs)":[36],"on":[37,156],"top":[38],"a":[40,51,94,128,147,157],"autoencoder,":[43],"where":[44],"top-RBM":[46,64],"layer":[47,54,65,120],"connected":[49],"to":[50,88,141,183],"soft-max":[52],"output":[53,116],"that":[55,162,176,188],"outputs":[56,66],"posterior":[58],"probabilities":[59],"speakers":[61],"and":[62,74,106,146,175],"bottleneck":[68,119,136,164],"features.":[69],"Both":[70],"autoencoder":[73,86],"RBMs":[75],"are":[76,138,192],"trained":[77],"contrastive":[79],"divergence,":[80],"followed":[81],"backpropagation":[83],"fine-tuning.":[84],"The":[85],"aims":[87],"reconstruct":[89],"clean":[91],"spectra":[92,100],"noisy":[95,103,158],"test":[96,104],"utterance":[97,105],"its":[107],"SNR":[108,173],"as":[109,127],"input.":[110],"With":[111],"capability,":[114],"from":[117],"can":[124],"be":[125],"considered":[126],"low-dimension":[129],"representation":[130],"denoised":[132],"utterances.":[133],"These":[134],"frame-based":[135],"than":[139],"used":[140],"train":[142],"iVector":[144],"extractor":[145],"PLDA":[148],"model":[149],"Experimental":[153],"results":[154],"based":[155],"YOHO":[159],"corpus":[160],"show":[161],"slightly":[166],"outperform":[167],"conventional":[169],"MFCC":[170],"under":[171],"low":[172],"conditions":[174],"fusion":[177],"lead":[182],"further":[184],"performance":[185],"gain,":[186],"suggesting":[187],"complementary":[193],"with":[194],"each":[195],"other.":[196]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":12},{"year":2018,"cited_by_count":3},{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
