{"id":"https://openalex.org/W4401328445","doi":"https://doi.org/10.1109/taslp.2024.3437237","title":"Towards Lightweight Speaker Verification via Adaptive Neural Network Quantization","display_name":"Towards Lightweight Speaker Verification via Adaptive Neural Network Quantization","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4401328445","doi":"https://doi.org/10.1109/taslp.2024.3437237"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3437237","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3437237","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114910830","display_name":"Bei Liu","orcid":"https://orcid.org/0000-0002-6208-003X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bei Liu","raw_affiliation_strings":["Auditory Cognition and Computational Acoustics Lab, the Department of Computer Science and Engineering and the MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Auditory Cognition and Computational Acoustics Lab, the Department of Computer Science and Engineering and the MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114910895","display_name":"Haoyu Wang","orcid":"https://orcid.org/0009-0000-0355-7021"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyu Wang","raw_affiliation_strings":["Auditory Cognition and Computational Acoustics Lab, the Department of Computer Science and Engineering and the MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Auditory Cognition and Computational Acoustics Lab, the Department of Computer Science and Engineering and the MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100341993","display_name":"Yanmin Qian","orcid":"https://orcid.org/0000-0002-0314-3790"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanmin Qian","raw_affiliation_strings":["Auditory Cognition and Computational Acoustics Lab, the Department of Computer Science and Engineering and the MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Auditory Cognition and Computational Acoustics Lab, the Department of Computer Science and Engineering and the MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5114910830"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":2.0851,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.88751798,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"32","issue":null,"first_page":"3771","last_page":"3784"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9519000053405762,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.82478928565979},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6171278357505798},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6132485270500183},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5566869378089905},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.553005576133728},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5515053272247314},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21010950207710266},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.06534063816070557}],"concepts":[{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.82478928565979},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6171278357505798},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6132485270500183},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5566869378089905},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.553005576133728},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5515053272247314},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21010950207710266},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.06534063816070557}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3437237","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3437237","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5465080215","display_name":null,"funder_award_id":"2021SHZDZX0102","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"}],"funders":[{"id":"https://openalex.org/F4320321885","display_name":"Science and Technology Commission of Shanghai Municipality","ror":"https://ror.org/03kt66j61"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":77,"referenced_works":["https://openalex.org/W1006777433","https://openalex.org/W1589137271","https://openalex.org/W2150769028","https://openalex.org/W2157161740","https://openalex.org/W2194775991","https://openalex.org/W2219249508","https://openalex.org/W2242818861","https://openalex.org/W2469490737","https://openalex.org/W2524428287","https://openalex.org/W2696967604","https://openalex.org/W2726515241","https://openalex.org/W2747238065","https://openalex.org/W2748488820","https://openalex.org/W2786771851","https://openalex.org/W2794506738","https://openalex.org/W2808631503","https://openalex.org/W2889519245","https://openalex.org/W2890964092","https://openalex.org/W2936733796","https://openalex.org/W2936774411","https://openalex.org/W2938358845","https://openalex.org/W2963122961","https://openalex.org/W2963371159","https://openalex.org/W2964279778","https://openalex.org/W2972369255","https://openalex.org/W2972441390","https://openalex.org/W2972986505","https://openalex.org/W2982041622","https://openalex.org/W3010893827","https://openalex.org/W3010925296","https://openalex.org/W3013020904","https://openalex.org/W3024869864","https://openalex.org/W3094325190","https://openalex.org/W3095851463","https://openalex.org/W3135006803","https://openalex.org/W3160076723","https://openalex.org/W3163082428","https://openalex.org/W3198041020","https://openalex.org/W3198685759","https://openalex.org/W3198804855","https://openalex.org/W3205519288","https://openalex.org/W3206287456","https://openalex.org/W3206886023","https://openalex.org/W3217016993","https://openalex.org/W4224917447","https://openalex.org/W4224924217","https://openalex.org/W4226184234","https://openalex.org/W4283458492","https://openalex.org/W4288091954","https://openalex.org/W4296068597","https://openalex.org/W4296068762","https://openalex.org/W4296068831","https://openalex.org/W4297841636","https://openalex.org/W4304481407","https://openalex.org/W4310494058","https://openalex.org/W4312739039","https://openalex.org/W4375798879","https://openalex.org/W4375869345","https://openalex.org/W4385822582","https://openalex.org/W4385823155","https://openalex.org/W4385823498","https://openalex.org/W6638523607","https://openalex.org/W6677580257","https://openalex.org/W6688816777","https://openalex.org/W6690026940","https://openalex.org/W6720242923","https://openalex.org/W6727208969","https://openalex.org/W6734062232","https://openalex.org/W6748224102","https://openalex.org/W6757817989","https://openalex.org/W6769178842","https://openalex.org/W6770425567","https://openalex.org/W6773270737","https://openalex.org/W6776767859","https://openalex.org/W6784256521","https://openalex.org/W6789240164","https://openalex.org/W6839093550"],"related_works":["https://openalex.org/W66821593","https://openalex.org/W4297807400","https://openalex.org/W1491159402","https://openalex.org/W4313854686","https://openalex.org/W321304764","https://openalex.org/W2249138175","https://openalex.org/W1521299571","https://openalex.org/W3162054169","https://openalex.org/W1516392727","https://openalex.org/W2140022733"],"abstract_inverted_index":{"Modern":[0],"speaker":[1,30],"verification":[2],"(SV)":[3],"systems":[4,228],"typically":[5],"demand":[6],"expensive":[7],"storage":[8],"and":[9,116,142,160],"computing":[10],"resources,":[11],"thereby":[12],"hindering":[13],"their":[14],"deployment":[15],"on":[16,56,146,157],"mobile":[17],"devices.":[18],"In":[19,206],"this":[20],"paper,":[21],"we":[22,33,67,125],"explore":[23],"adaptive":[24,37,143],"neural":[25],"network":[26,53,106,119],"quantization":[27,40,48,86,130,154,180,211],"for":[28,51,201],"lightweight":[29,226],"verification.":[31],"Firstly,":[32],"propose":[34],"a":[35,69,83,90,121,163,188,221,239],"novel":[36],"uniform":[38,152,175],"precision":[39,85,153,176,179],"method":[41],"which":[42],"enables":[43],"the":[44,63,118,140,195,215],"dynamic":[45],"generation":[46],"of":[47,71,136,167,217],"centroids":[49],"customized":[50],"each":[52],"layer":[54],"based":[55],"k-means":[57],"clustering.":[58],"By":[59],"applying":[60],"it":[61],"to":[62,104,132,174,197],"pre-trained":[64],"SV":[65,227],"systems,":[66],"obtain":[68],"series":[70],"quantized":[72,81,138],"variants":[73],"with":[74,89,187,224],"different":[75,105],"bit":[76,102,109,199],"widths.":[77],"To":[78],"enhance":[79],"low-bit":[80],"models,":[82],"mixed":[84,178],"algorithm":[87],"along":[88],"multi-stage":[91],"fine-tuning":[92],"(MSFT)":[93],"strategy":[94],"is":[95,155],"further":[96],"introduced.":[97],"This":[98],"approach":[99],"assigns":[100],"varying":[101],"widths":[103],"layers.":[107],"When":[108],"combinations":[110],"are":[111],"determined,":[112],"MSFT":[113],"progressively":[114],"quantizes":[115],"fine-tunes":[117],"in":[120],"specific":[122],"order.":[123],"Finally,":[124,220],"design":[126],"two":[127],"distinct":[128],"binary":[129],"schemes":[131,212],"mitigate":[133],"performance":[134,185,216],"degradation":[135],"1-bit":[137,210],"models:":[139],"static":[141],"quantizers.":[144],"Experiments":[145],"VoxCeleb":[147],"demonstrate":[148],"that":[149,230],"lossless":[150],"4-bit":[151],"achieved":[156],"both":[158],"ResNets":[159],"DF-ResNets,":[161],"yielding":[162],"promising":[164],"compression":[165],"ratio":[166],"<inline-formula":[168],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[169],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[170],"notation=\"LaTeX\">$\\sim$</tex-math></inline-formula>8.":[171],"Moreover,":[172],"compared":[173],"approach,":[177],"not":[181],"only":[182],"obtains":[183],"additional":[184],"improvements":[186],"similar":[189],"model":[190,204,244],"size":[191,245],"but":[192],"also":[193],"offers":[194],"flexibility":[196],"generate":[198],"combination":[200],"any":[202],"desirable":[203],"size.":[205],"addition,":[207],"our":[208,231],"suggested":[209],"remarkably":[213],"boost":[214],"binarized":[218],"models.":[219],"thorough":[222],"comparison":[223],"existing":[225],"reveals":[229],"proposed":[232],"models":[233],"outperform":[234],"all":[235],"previous":[236],"methods":[237],"by":[238],"large":[240],"margin":[241],"across":[242],"various":[243],"ranges.":[246]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
