{"id":"https://openalex.org/W4402198350","doi":"https://doi.org/10.31577/cai_2024_4_819","title":"Multi-Stream Convolutional Neural Network with Frequency Selection for Robust Speaker Verification","display_name":"Multi-Stream Convolutional Neural Network with Frequency Selection for Robust Speaker Verification","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4402198350","doi":"https://doi.org/10.31577/cai_2024_4_819"},"language":"en","primary_location":{"id":"doi:10.31577/cai_2024_4_819","is_oa":true,"landing_page_url":"https://doi.org/10.31577/cai_2024_4_819","pdf_url":"https://www.cai.sk/ojs/index.php/cai/article/download/2024_4_819/1304","source":{"id":"https://openalex.org/S4210200093","display_name":"Computing and Informatics","issn_l":"1335-9150","issn":["1335-9150","2585-8807"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computing and Informatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://www.cai.sk/ojs/index.php/cai/article/download/2024_4_819/1304","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100647649","display_name":"Wei Yao","orcid":"https://orcid.org/0000-0001-7704-0615"},"institutions":[{"id":"https://openalex.org/I4210160030","display_name":"Zhejiang University of Water Resource and Electric Power","ror":"https://ror.org/04dg5b632","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210160030"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wei Yao","raw_affiliation_strings":["Key Laboratory of Technology in Rural Water Management of Zhejiang Province, College of Electric Engineering, Zhejiang University of Water Resources and Electric Power, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Technology in Rural Water Management of Zhejiang Province, College of Electric Engineering, Zhejiang University of Water Resources and Electric Power, Hangzhou, China","institution_ids":["https://openalex.org/I4210160030"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046864714","display_name":"Chen Shen","orcid":"https://orcid.org/0000-0003-3535-8494"},"institutions":[{"id":"https://openalex.org/I4210106357","display_name":"Zhejiang Energy Research Institute","ror":"https://ror.org/01fqrb109","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210106357"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shen Chen","raw_affiliation_strings":["Wanbang Digital Energy Co., Ltd. (China), Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Wanbang Digital Energy Co., Ltd. (China), Hangzhou, China","institution_ids":["https://openalex.org/I4210106357"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014057812","display_name":"Jiamin Cui","orcid":null},"institutions":[{"id":"https://openalex.org/I4210160030","display_name":"Zhejiang University of Water Resource and Electric Power","ror":"https://ror.org/04dg5b632","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210160030"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiamin Cui","raw_affiliation_strings":["Key Laboratory of Technology in Rural Water Management of Zhejiang Province, College of Electric Engineering, Zhejiang University of Water Resources and Electric Power, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Technology in Rural Water Management of Zhejiang Province, College of Electric Engineering, Zhejiang University of Water Resources and Electric Power, Hangzhou, China","institution_ids":["https://openalex.org/I4210160030"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067495689","display_name":"Yaolin Lou","orcid":"https://orcid.org/0000-0002-8476-466X"},"institutions":[{"id":"https://openalex.org/I4210160030","display_name":"Zhejiang University of Water Resource and Electric Power","ror":"https://ror.org/04dg5b632","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210160030"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaolin Lou","raw_affiliation_strings":["Key Laboratory of Technology in Rural Water Management of Zhejiang Province, College of Electric Engineering, Zhejiang University of Water Resources and Electric Power, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Technology in Rural Water Management of Zhejiang Province, College of Electric Engineering, Zhejiang University of Water Resources and Electric Power, Hangzhou, China","institution_ids":["https://openalex.org/I4210160030"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100647649"],"corresponding_institution_ids":["https://openalex.org/I4210160030"],"apc_list":null,"apc_paid":null,"fwci":0.6891,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.75326352,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"43","issue":"4","first_page":"819","last_page":"848"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7500802278518677},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.732063889503479},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.6912201642990112},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6002514958381653},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5859537720680237},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.349473774433136},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3443988561630249},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3299858570098877}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7500802278518677},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.732063889503479},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.6912201642990112},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6002514958381653},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5859537720680237},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.349473774433136},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3443988561630249},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3299858570098877}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.31577/cai_2024_4_819","is_oa":true,"landing_page_url":"https://doi.org/10.31577/cai_2024_4_819","pdf_url":"https://www.cai.sk/ojs/index.php/cai/article/download/2024_4_819/1304","source":{"id":"https://openalex.org/S4210200093","display_name":"Computing and Informatics","issn_l":"1335-9150","issn":["1335-9150","2585-8807"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computing and Informatics","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.31577/cai_2024_4_819","is_oa":true,"landing_page_url":"https://doi.org/10.31577/cai_2024_4_819","pdf_url":"https://www.cai.sk/ojs/index.php/cai/article/download/2024_4_819/1304","source":{"id":"https://openalex.org/S4210200093","display_name":"Computing and Informatics","issn_l":"1335-9150","issn":["1335-9150","2585-8807"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computing and Informatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1169237214","display_name":null,"funder_award_id":"LGG21E060001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8270875460","display_name":null,"funder_award_id":"LGG21E060001","funder_id":"https://openalex.org/F4320338464","funder_display_name":"Natural Science Foundation of Zhejiang Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320338464","display_name":"Natural Science Foundation of Zhejiang Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4402198350.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W66821593","https://openalex.org/W4297807400","https://openalex.org/W2249138175","https://openalex.org/W1521299571","https://openalex.org/W4313854686","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W289407349","https://openalex.org/W2140022733"],"abstract_inverted_index":{"Speaker":[0],"verification":[1,85],"aims":[2],"to":[3,10,46,52,98,120,141,207],"verify":[4],"whether":[5],"an":[6],"input":[7,174],"speech":[8],"corresponds":[9],"the":[11,27,100,106,123,131,219],"claimed":[12],"speaker,":[13],"and":[14,68,130,189,218,242],"conventionally,":[15],"this":[16,36,81,163],"kind":[17],"of":[18,57,74,102,108,125,134,198,233,245],"system":[19],"is":[20,62,119,179],"deployed":[21],"based":[22],"on":[23,215],"single-stream":[24,150,228],"scenario,":[25],"wherein":[26,152],"feature":[28,113,132],"extractor":[29,133],"operates":[30],"in":[31,162,171,236,248],"full":[32,58],"frequency":[33,54,59,65,116,126,145,183,187],"range.":[34],"In":[35],"paper,":[37],"we":[38,111],"hypothesize":[39],"that":[40,223],"machine":[41],"can":[42,137],"learn":[43],"enough":[44],"knowledge":[45],"do":[47],"classification":[48],"task":[49],"when":[50],"listening":[51],"partial":[53],"range":[55],"instead":[56],"range,":[60,188],"which":[61,118,139],"so":[63],"called":[64],"selection":[66],"technique,":[67],"further":[69],"propose":[70],"a":[71,182,204],"novel":[72],"framework":[73,89],"multi-stream":[75,224],"Convolutional":[76],"Neural":[77],"Network":[78],"(CNN)":[79],"with":[80,115,230],"technique":[82],"for":[83,159,176],"speaker":[84],"tasks.":[86],"The":[87,173,194],"proposed":[88],"accommodates":[90],"diverse":[91],"temporal":[92,109,196],"embeddings":[93,197],"generated":[94],"from":[95,148],"multiple":[96,167],"streams":[97,168],"enhance":[99],"robustness":[101],"acoustic":[103],"modeling.":[104],"For":[105],"diversity":[107],"embeddings,":[110],"consider":[112],"augmentation":[114],"selection,":[117],"manually":[121],"segment":[122],"full-band":[124],"into":[127,203],"several":[128],"sub-bands,":[129],"each":[135,153,177,199],"stream":[136,178,200],"select":[138],"sub-bands":[140],"use":[142],"as":[143],"target":[144],"domain.":[146],"Different":[147],"conventional":[149],"solution":[151],"utterance":[154,175],"would":[155],"only":[156],"be":[157],"processed":[158],"one":[160],"time,":[161],"framework,":[164],"there":[165],"are":[166],"processing":[169],"it":[170],"parallel.":[172],"pre-processed":[180],"by":[181,191],"selector":[184],"within":[185],"specified":[186],"post-processed":[190],"mean":[192],"normalization.":[193],"normalized":[195],"will":[201],"flow":[202],"pooling":[205],"layer":[206],"generate":[208],"fused":[209],"embeddings.":[210],"We":[211],"conduct":[212],"extensive":[213],"experiments":[214],"VoxCeleb":[216],"dataset,":[217],"experimental":[220],"results":[221],"demonstrate":[222],"CNN":[225],"significantly":[226],"outperforms":[227],"baseline":[229],"20.53":[231],"%":[232,244],"relative":[234,246],"improvement":[235,247],"minimum":[237],"Decision":[238],"Cost":[239],"Function":[240],"(minDCF)":[241],"15.28":[243],"Equal":[249],"Error":[250],"Rate":[251],"(EER).":[252]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
