{"id":"https://openalex.org/W4408353229","doi":"https://doi.org/10.1109/icassp49660.2025.10888890","title":"SoCov: Semi-Orthogonal Parametric Pooling of Covariance Matrix for Speaker Recognition","display_name":"SoCov: Semi-Orthogonal Parametric Pooling of Covariance Matrix for Speaker Recognition","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408353229","doi":"https://doi.org/10.1109/icassp49660.2025.10888890"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10888890","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888890","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2504.16441","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053549702","display_name":"Rongjin Li","orcid":"https://orcid.org/0000-0002-0178-5167"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rongjin Li","raw_affiliation_strings":["VoiceAI Technologies, Co. Ltd,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"VoiceAI Technologies, Co. Ltd,Shenzhen,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100448761","display_name":"Weibin Zhang","orcid":"https://orcid.org/0000-0002-2771-704X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weibin Zhang","raw_affiliation_strings":["VoiceAI Technologies, Co. Ltd,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"VoiceAI Technologies, Co. Ltd,Shenzhen,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004048913","display_name":"Dongpeng Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dongpeng Chen","raw_affiliation_strings":["VoiceAI Technologies, Co. Ltd,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"VoiceAI Technologies, Co. Ltd,Shenzhen,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071873783","display_name":"Jintao Kang","orcid":null},"institutions":[{"id":"https://openalex.org/I1302611135","display_name":"Ministry of Public Security of the People's Republic of China","ror":"https://ror.org/00bt9we26","country_code":"CN","type":"government","lineage":["https://openalex.org/I1302611135"]},{"id":"https://openalex.org/I4210139944","display_name":"Institute of Forensic Science","ror":"https://ror.org/04ry60e05","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210089256","https://openalex.org/I4210127390","https://openalex.org/I4210139944"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jintao Kang","raw_affiliation_strings":["Ministry of Public Security,Institute of Forensic Science,China"],"affiliations":[{"raw_affiliation_string":"Ministry of Public Security,Institute of Forensic Science,China","institution_ids":["https://openalex.org/I1302611135","https://openalex.org/I4210139944"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036301580","display_name":"Xiaofen Xing","orcid":"https://orcid.org/0000-0002-0016-9055"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofen Xing","raw_affiliation_strings":["South China University of Technology,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"South China University of Technology,Guangzhou,China","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5053549702"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01878927,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/covariance-matrix","display_name":"Covariance matrix","score":0.6222364902496338},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5838298201560974},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.5704547166824341},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5616574287414551},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.5399004220962524},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5299922823905945},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5095060467720032},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44095703959465027},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3002163767814636},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.22244933247566223},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.145432710647583}],"concepts":[{"id":"https://openalex.org/C185142706","wikidata":"https://www.wikidata.org/wiki/Q1134404","display_name":"Covariance matrix","level":2,"score":0.6222364902496338},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5838298201560974},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.5704547166824341},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5616574287414551},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.5399004220962524},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5299922823905945},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5095060467720032},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44095703959465027},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3002163767814636},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.22244933247566223},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.145432710647583}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp49660.2025.10888890","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888890","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2504.16441","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.16441","pdf_url":"https://arxiv.org/pdf/2504.16441","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2504.16441","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.16441","pdf_url":"https://arxiv.org/pdf/2504.16441","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4408353229.pdf","grobid_xml":"https://content.openalex.org/works/W4408353229.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W1589137271","https://openalex.org/W2046056978","https://openalex.org/W2122364000","https://openalex.org/W2194775991","https://openalex.org/W2219249508","https://openalex.org/W2402146185","https://openalex.org/W2696967604","https://openalex.org/W2726515241","https://openalex.org/W2748488820","https://openalex.org/W2794506738","https://openalex.org/W2888867175","https://openalex.org/W2889519245","https://openalex.org/W2890964092","https://openalex.org/W2897402882","https://openalex.org/W2939634425","https://openalex.org/W2962761264","https://openalex.org/W2963550079","https://openalex.org/W2972369255","https://openalex.org/W3015598461","https://openalex.org/W3016112863","https://openalex.org/W3024869864","https://openalex.org/W3035300720","https://openalex.org/W3095410713","https://openalex.org/W3135006803","https://openalex.org/W3163361649","https://openalex.org/W3194763370","https://openalex.org/W3197392277","https://openalex.org/W4224919627","https://openalex.org/W4283014282","https://openalex.org/W4295312788","https://openalex.org/W4385245566","https://openalex.org/W6631362777","https://openalex.org/W6688816777","https://openalex.org/W6766978945","https://openalex.org/W6800175519"],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W2249138175","https://openalex.org/W4313854686","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W289407349","https://openalex.org/W2368768466","https://openalex.org/W2757081366","https://openalex.org/W3197877226"],"abstract_inverted_index":{"In":[0,52],"conventional":[1,136,167],"deep":[2,152],"speaker":[3],"embedding":[4,107],"frameworks,":[5],"the":[6,57,70,74,85,94,103,124,135,166],"pooling":[7,31,60,68],"layer":[8],"aggregates":[9],"all":[10],"frame-level":[11,45,76],"features":[12,46,77],"over":[13],"time":[14],"and":[15,19,48,78,127],"computes":[16,69],"their":[17],"mean":[18],"standard":[20,96,170],"deviation":[21,97],"statistics":[22,30],"as":[23],"inputs":[24,101],"to":[25,99,102,119,156,165],"subsequent":[26],"segment-level":[27,104],"layers.":[28,105],"Such":[29],"strategy":[32],"produces":[33],"fixed-length":[34],"representations":[35],"from":[36,73],"variable-length":[37],"speech":[38],"segments.":[39],"However,":[40],"this":[41,53],"method":[42],"treats":[43],"different":[44,121],"equally":[47],"discards":[49],"covariance":[50,71],"information.":[51],"paper,":[54],"we":[55],"propose":[56],"Semi-orthogonal":[58],"parameter":[59],"of":[61,145],"Covariance":[62],"matrix":[63,72],"(SoCov)":[64],"method.":[65],"The":[66,114,130],"SoCov":[67,110,154],"self-attentive":[75,151],"compresses":[79],"it":[80],"into":[81],"a":[82,140],"vector":[83,98],"using":[84,150],"semi-orthogonal":[86],"parametric":[87],"vectorization,":[88],"which":[89],"is":[90,111,117],"then":[91],"concatenated":[92],"with":[93,139],"weighted":[95],"form":[100],"Deep":[106],"based":[108],"on":[109,123,147,159],"called":[112],"\"sc-vector\".":[113],"proposed":[115],"sc-vector":[116,131],"compared":[118],"several":[120],"baselines":[122],"SRE21":[125],"development":[126],"evaluation":[128],"sets.":[129],"system":[132],"significantly":[133],"outperforms":[134],"x-vector":[137],"system,":[138],"relative":[141],"reduction":[142],"in":[143],"EER":[144,158],"15.5%":[146],"SRE21Eval.":[148],"When":[149],"feature,":[153],"helps":[155],"reduce":[157],"SRE21Eval":[160],"by":[161],"about":[162],"30.9%":[163],"relatively":[164],"\"mean":[168],"+":[169],"deviation\"":[171],"statistics.":[172]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
