{"id":"https://openalex.org/W7133331044","doi":"https://doi.org/10.1109/ijcb65343.2025.11410671","title":"Text-Independent Speaker Verification Employing A Novel Hybrid Neural Embedding Extractor","display_name":"Text-Independent Speaker Verification Employing A Novel Hybrid Neural Embedding Extractor","publication_year":2025,"publication_date":"2025-09-08","ids":{"openalex":"https://openalex.org/W7133331044","doi":"https://doi.org/10.1109/ijcb65343.2025.11410671"},"language":null,"primary_location":{"id":"doi:10.1109/ijcb65343.2025.11410671","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcb65343.2025.11410671","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Joint Conference on Biometrics (IJCB)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127969659","display_name":"Jahangir Alam","orcid":null},"institutions":[{"id":"https://openalex.org/I4210111842","display_name":"Computer Research Institute of Montr\u00e9al","ror":"https://ror.org/0279d5115","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210111842"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Jahangir Alam","raw_affiliation_strings":["Computer Research Institute of Montreal (CRIM),Montreal (Quebec),Canada"],"affiliations":[{"raw_affiliation_string":"Computer Research Institute of Montreal (CRIM),Montreal (Quebec),Canada","institution_ids":["https://openalex.org/I4210111842"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102581970","display_name":"Md Shahidul Alam","orcid":null},"institutions":[{"id":"https://openalex.org/I4210111842","display_name":"Computer Research Institute of Montr\u00e9al","ror":"https://ror.org/0279d5115","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210111842"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Md Shahidul Alam","raw_affiliation_strings":["Computer Research Institute of Montreal (CRIM),Montreal (Quebec),Canada"],"affiliations":[{"raw_affiliation_string":"Computer Research Institute of Montreal (CRIM),Montreal (Quebec),Canada","institution_ids":["https://openalex.org/I4210111842"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5127969659"],"corresponding_institution_ids":["https://openalex.org/I4210111842"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.88674141,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8853999972343445,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8853999972343445,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.013700000010430813,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.005400000140070915,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6466000080108643},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6025000214576721},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5766000151634216},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5595999956130981},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5493999719619751},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.517799973487854},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5078999996185303},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.46160000562667847},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.4578999876976013},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.4399999976158142}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7815999984741211},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6466000080108643},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6025000214576721},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5992000102996826},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5766000151634216},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5595999956130981},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5526000261306763},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5493999719619751},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.517799973487854},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5078999996185303},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.46160000562667847},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.4578999876976013},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.4399999976158142},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.42570000886917114},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.40610000491142273},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.38519999384880066},{"id":"https://openalex.org/C2779990667","wikidata":"https://www.wikidata.org/wiki/Q5953266","display_name":"Hybrid neural network","level":3,"score":0.3337000012397766},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.30979999899864197},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C175202392","wikidata":"https://www.wikidata.org/wiki/Q2434543","display_name":"Time delay neural network","level":3,"score":0.28700000047683716},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.28450000286102295},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.26899999380111694},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.26649999618530273},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.25929999351501465},{"id":"https://openalex.org/C34146451","wikidata":"https://www.wikidata.org/wiki/Q5048094","display_name":"Cascade","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2529999911785126},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.251800000667572},{"id":"https://openalex.org/C50897621","wikidata":"https://www.wikidata.org/wiki/Q2665508","display_name":"Hybrid system","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcb65343.2025.11410671","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcb65343.2025.11410671","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Joint Conference on Biometrics (IJCB)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1600744878","https://openalex.org/W2046056978","https://openalex.org/W2114925438","https://openalex.org/W2150769028","https://openalex.org/W2194775991","https://openalex.org/W2890964092","https://openalex.org/W2922509574","https://openalex.org/W2962788625","https://openalex.org/W2963576203","https://openalex.org/W2972392008","https://openalex.org/W2972961496","https://openalex.org/W2981087920","https://openalex.org/W3013020904","https://openalex.org/W3015611708","https://openalex.org/W3024318585","https://openalex.org/W3024869864","https://openalex.org/W3095851463","https://openalex.org/W3097244734","https://openalex.org/W3141035251","https://openalex.org/W3142516134","https://openalex.org/W3200111860","https://openalex.org/W3205878676","https://openalex.org/W3206886023","https://openalex.org/W4206908380","https://openalex.org/W4210486131","https://openalex.org/W4221154746","https://openalex.org/W4223531842","https://openalex.org/W4224795206","https://openalex.org/W4283022413","https://openalex.org/W4312095959","https://openalex.org/W4319862221","https://openalex.org/W4375869145","https://openalex.org/W4385822972","https://openalex.org/W4390168362","https://openalex.org/W4392411855","https://openalex.org/W4416250889"],"related_works":[],"abstract_inverted_index":{"Reliable":[0],"and":[1,35,58,89,113,150,172,178,191],"discriminative":[2,64],"speaker":[3,14,65,95],"embedding":[4,30,156],"extraction":[5],"lies":[6],"at":[7,97],"the":[8,53,98,115,119,138,155,161,176,197],"heart":[9],"of":[10,56,80,118,163],"modern":[11],"neural":[12,26,45],"automatic":[13],"verification":[15],"(ASV)":[16],"systems.":[17],"In":[18],"this":[19],"study,":[20],"we":[21,101],"introduce":[22],"a":[23,42,72,78],"novel":[24],"hybrid":[25,88,120],"architecture":[27],"that":[28,182],"enhances":[29],"quality":[31],"by":[32,126],"integrating":[33],"frequency-":[34],"channel-aware":[36],"Selective":[37],"Kernel":[38],"Attention":[39],"(SKA)":[40],"into":[41,71],"2D":[43],"convolutional":[44],"network":[46],"(2D-CNN)":[47],"feature":[48,68,166],"extractor.":[49],"This":[50],"design":[51],"strengthens":[52],"joint":[54],"modeling":[55],"frequency":[57],"channel":[59],"characteristics,":[60],"resulting":[61],"in":[62,154],"more":[63],"representations.":[66],"The":[67,132],"extractor":[69],"feeds":[70],"composite":[73],"frame-level":[74],"network,":[75],"structured":[76],"as":[77],"cascade":[79],"Time-Delay":[81],"Neural":[82],"Network":[83],"(TDNN)\u2013Long":[84],"Short-Term":[85],"Memory":[86],"(LSTM)":[87],"fully":[90],"TDNN":[91],"layers.":[92],"To":[93],"summarize":[94],"traits":[96],"utterance":[99],"level,":[100],"employ":[102],"Multi-Level":[103],"Attentive":[104],"Statistics":[105],"Pooling":[106],"(MLASP),":[107],"which":[108,145],"captures":[109],"diverse":[110],"statistical":[111],"cues":[112],"exploits":[114],"complementary":[116],"strengths":[117],"architecture.":[121],"MLASP":[122],"further":[123],"improves":[124],"robustness":[125],"recovering":[127],"subtle,":[128],"previously":[129],"underutilized":[130],"features.":[131],"full":[133],"system":[134],"is":[135],"trained":[136,195],"using":[137],"additive":[139],"angular":[140],"margin":[141],"softmax":[142],"(AAMSoftmax)":[143],"loss,":[144],"promotes":[146],"tighter":[147],"intra-speaker":[148],"clustering":[149],"broader":[151],"inter-speaker":[152],"separation":[153],"space.":[157],"We":[158],"also":[159],"explore":[160],"influence":[162],"different":[164],"CNN-driven":[165],"learning":[167],"modules":[168],"on":[169,175],"ASV":[170,193],"performance":[171],"resilience.":[173],"Evaluations":[174],"VoxCeleb":[177],"CNCeleb":[179],"benchmarks":[180],"confirm":[181],"our":[183],"proposed":[184],"method":[185],"consistently":[186],"surpasses":[187],"both":[188],"standard":[189],"baselines":[190],"state-of-the-art":[192],"models":[194],"under":[196],"same":[198],"conditions.":[199]},"counts_by_year":[],"updated_date":"2026-03-05T07:30:30.508283","created_date":"2026-03-04T00:00:00"}
