{"id":"https://openalex.org/W4402592714","doi":"https://doi.org/10.1109/taslp.2024.3463491","title":"Efficient Lightweight Speaker Verification With Broadcasting CNN-Transformer and Knowledge Distillation Training of Self-Attention Maps","display_name":"Efficient Lightweight Speaker Verification With Broadcasting CNN-Transformer and Knowledge Distillation Training of Self-Attention Maps","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4402592714","doi":"https://doi.org/10.1109/taslp.2024.3463491"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3463491","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3463491","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070614213","display_name":"Jeong\u2010Hwan Choi","orcid":"https://orcid.org/0000-0002-7741-5365"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jeong-Hwan Choi","raw_affiliation_strings":["School of Electronics, Hanyang University, Seoul, South Korea","School of Electronics, Hanyang University, Seoul, Korea"],"raw_orcid":"https://orcid.org/0000-0002-7741-5365","affiliations":[{"raw_affiliation_string":"School of Electronics, Hanyang University, Seoul, South Korea","institution_ids":["https://openalex.org/I4575257"]},{"raw_affiliation_string":"School of Electronics, Hanyang University, Seoul, Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051103419","display_name":"Joon-Young Yang","orcid":"https://orcid.org/0000-0003-0096-4371"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Joon-Young Yang","raw_affiliation_strings":["School of Electronics, Hanyang University, Seoul, South Korea","School of Electronics, Hanyang University, Seoul, Korea"],"raw_orcid":"https://orcid.org/0000-0003-0096-4371","affiliations":[{"raw_affiliation_string":"School of Electronics, Hanyang University, Seoul, South Korea","institution_ids":["https://openalex.org/I4575257"]},{"raw_affiliation_string":"School of Electronics, Hanyang University, Seoul, Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002418613","display_name":"Joon\u2010Hyuk Chang","orcid":"https://orcid.org/0000-0003-2610-2323"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Joon-Hyuk Chang","raw_affiliation_strings":["School of Electronics, Hanyang University, Seoul, South Korea","School of Electronics, Hanyang University, Seoul, Korea"],"raw_orcid":"https://orcid.org/0000-0003-2610-2323","affiliations":[{"raw_affiliation_string":"School of Electronics, Hanyang University, Seoul, South Korea","institution_ids":["https://openalex.org/I4575257"]},{"raw_affiliation_string":"School of Electronics, Hanyang University, Seoul, Korea","institution_ids":["https://openalex.org/I4575257"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4575257"],"apc_list":null,"apc_paid":null,"fwci":1.7915,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.87389891,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"32","issue":null,"first_page":"4580","last_page":"4595"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9747999906539917,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9747999906539917,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9455000162124634,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7250854969024658},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6655400395393372},{"id":"https://openalex.org/keywords/broadcasting","display_name":"Broadcasting (networking)","score":0.47761720418930054},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.45806366205215454},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4361022710800171},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.4240271747112274},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.41472315788269043},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.37874943017959595},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3216897249221802},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1361221969127655},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11525040864944458},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.07689321041107178}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7250854969024658},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6655400395393372},{"id":"https://openalex.org/C110157686","wikidata":"https://www.wikidata.org/wiki/Q922122","display_name":"Broadcasting (networking)","level":2,"score":0.47761720418930054},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.45806366205215454},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4361022710800171},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.4240271747112274},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.41472315788269043},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.37874943017959595},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3216897249221802},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1361221969127655},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11525040864944458},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.07689321041107178},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3463491","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3463491","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W1665214252","https://openalex.org/W2046056978","https://openalex.org/W2070176749","https://openalex.org/W2157161740","https://openalex.org/W2194775991","https://openalex.org/W2402146185","https://openalex.org/W2516764878","https://openalex.org/W2531409750","https://openalex.org/W2549139847","https://openalex.org/W2616247523","https://openalex.org/W2696967604","https://openalex.org/W2726515241","https://openalex.org/W2747165665","https://openalex.org/W2808631503","https://openalex.org/W2888807255","https://openalex.org/W2890964092","https://openalex.org/W2922509574","https://openalex.org/W2928165649","https://openalex.org/W2936733796","https://openalex.org/W2963077989","https://openalex.org/W2963125010","https://openalex.org/W2963163009","https://openalex.org/W2963371159","https://openalex.org/W2963503540","https://openalex.org/W2969985801","https://openalex.org/W3010893827","https://openalex.org/W3013020904","https://openalex.org/W3024869864","https://openalex.org/W3034457371","https://openalex.org/W3095851463","https://openalex.org/W3096918678","https://openalex.org/W3097375352","https://openalex.org/W3097731653","https://openalex.org/W3105966348","https://openalex.org/W3137249133","https://openalex.org/W3142516134","https://openalex.org/W3161447515","https://openalex.org/W3163082428","https://openalex.org/W3174329270","https://openalex.org/W3196496149","https://openalex.org/W3197343310","https://openalex.org/W3198742213","https://openalex.org/W3205519288","https://openalex.org/W3206114047","https://openalex.org/W3215290109","https://openalex.org/W4205234379","https://openalex.org/W4214493665","https://openalex.org/W4214717370","https://openalex.org/W4221154745","https://openalex.org/W4224916451","https://openalex.org/W4296069297","https://openalex.org/W4297841779","https://openalex.org/W4312298622","https://openalex.org/W4312527085","https://openalex.org/W4312950730","https://openalex.org/W4321608821","https://openalex.org/W4385245566","https://openalex.org/W4385822752","https://openalex.org/W4391423796","https://openalex.org/W4396982346","https://openalex.org/W6631190155","https://openalex.org/W6637551013","https://openalex.org/W6638523607","https://openalex.org/W6688816777","https://openalex.org/W6729956949","https://openalex.org/W6730179637","https://openalex.org/W6737575990","https://openalex.org/W6739651123","https://openalex.org/W6755977528","https://openalex.org/W6773815586","https://openalex.org/W6780226713","https://openalex.org/W6784333009","https://openalex.org/W6784686509","https://openalex.org/W6803691255"],"related_works":["https://openalex.org/W66821593","https://openalex.org/W1521299571","https://openalex.org/W4235705411","https://openalex.org/W204267554","https://openalex.org/W2134501921","https://openalex.org/W4252590334","https://openalex.org/W2543777506","https://openalex.org/W3096066489","https://openalex.org/W3119184205","https://openalex.org/W2150532155"],"abstract_inverted_index":{"Developing":[0],"a":[1,33,49,84,181,194],"lightweight":[2,34],"speaker":[3,15],"embedding":[4],"extractor":[5],"(SEE)":[6],"is":[7],"crucial":[8],"for":[9,187],"the":[10,43,99,105,107,113,117,121,138,142,144,149,164,170,174,177,188,198,203,211,219,235,267,277,303],"practical":[11],"implementation":[12],"of":[13,52,104,141,158,173,180,193,202,213,276,281],"automatic":[14],"verification":[16],"(ASV)":[17],"systems.":[18],"To":[19],"this":[20,69],"end,":[21],"we":[22,71],"recently":[23],"introduced":[24],"<italic":[25],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[26],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">broadcasting":[27],"convolutional":[28],"neural":[29],"networks":[30],"(CNNs)-meet-vision-Transformers</i>":[31],"(BC-CMT),":[32],"SEE":[35,59],"that":[36,130,234],"utilizes":[37],"broadcasted":[38],"residual":[39],"learning":[40],"(BRL)":[41],"within":[42],"hybrid":[44],"CNN-Transformer":[45],"architecture":[46,82],"to":[47,97,126,136,154,168,190,266,283,289,294],"maintain":[48],"small":[50],"number":[51,157,212],"model":[53,81,238,241],"parameters.":[54],"We":[55],"proposed":[56,75,220,236,278,299],"three":[57,61],"BC-CMT-based":[58],"with":[60,163,227,307],"different":[62,228],"sizes:":[63],"BC-CMT-Tiny,":[64,176],"-Small,":[65],"and":[66,83,102,160,245,255,272,292],"-Base.":[67],"In":[68],"study,":[70],"extend":[72],"our":[73],"previously":[74],"BC-CMT":[76,205],"by":[77,305],"introducing":[78],"an":[79],"improved":[80,308],"training":[85,171],"strategy":[86,221],"based":[87],"on":[88,210,252,260],"knowledge":[89],"distillation":[90],"(KD)":[91],"using":[92],"self-attention":[93],"(SA)":[94],"maps.":[95],"First,":[96],"reduce":[98],"computational":[100],"costs":[101],"latency":[103],"BC-CMT,":[106,114,143],"two-dimensional":[108],"(2D)":[109],"SA":[110,118,128,139,150,178],"operations":[111,129,251],"in":[112,120,148,248],"which":[115],"calculate":[116],"maps":[119,179,226],"frequency\u2013time":[122],"dimensions,":[123],"are":[124,152,161,185],"simplified":[125],"1D":[127],"consider":[131],"only":[132],"temporal":[133],"importance.":[134],"Moreover,":[135],"enhance":[137],"capability":[140],"group":[145],"convolution":[146,217],"layers":[147],"block":[151],"adjusted":[153],"have":[155],"smaller":[156,195],"groups":[159],"combined":[162],"BRL":[165],"operations.":[166],"Second,":[167],"improve":[169],"effectiveness":[172],"modified":[175,204],"pretrained":[182],"large":[183],"BC-CMT-Base":[184],"used":[186],"KD":[189,223,300],"guide":[191],"those":[192],"BC-CMT-Tiny.":[196,269],"Because":[197],"attention":[199,309],"map":[200],"sizes":[201],"models":[206],"do":[207],"not":[208],"depend":[209],"frequency":[214],"bins":[215],"or":[216],"channels,":[218],"enables":[222],"between":[224],"feature":[225],"sizes.":[229],"The":[230,270,298],"experimental":[231],"results":[232],"demonstrate":[233],"BC-CMT-Tiny":[237,279],"having":[239],"271.44K":[240],"parameters":[242],"achieved":[243],"36.8%":[244],"9.3%":[246],"reduction":[247],"floating":[249],"point":[250],"1s":[253],"signals":[254,286],"equal":[256],"error":[257],"rate":[258],"(EER)":[259],"VoxCeleb":[261],"1":[262,282],"testset,":[263],"respectively,":[264],"compared":[265],"conventional":[268],"CPU":[271],"GPU":[273],"running":[274],"time":[275],"ranges":[280],"10":[284],"s":[285],"were":[287],"29.07":[288],"146.32":[290],"ms":[291],"36.01":[293],"206.43":[295],"ms,":[296],"respectively.":[297],"further":[301],"reduced":[302],"EER":[304],"15.5%":[306],"capability.":[310]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
