{"id":"https://openalex.org/W4312866523","doi":"https://doi.org/10.1109/icpr56361.2022.9956552","title":"Deep Speaker Embedding Using Hybrid Network of Multi-Feature Aggregation and Multi-Loss Fusion for TI-SV","display_name":"Deep Speaker Embedding Using Hybrid Network of Multi-Feature Aggregation and Multi-Loss Fusion for TI-SV","publication_year":2022,"publication_date":"2022-08-21","ids":{"openalex":"https://openalex.org/W4312866523","doi":"https://doi.org/10.1109/icpr56361.2022.9956552"},"language":"en","primary_location":{"id":"doi:10.1109/icpr56361.2022.9956552","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956552","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100375302","display_name":"Xiao Li","orcid":"https://orcid.org/0000-0002-7318-7879"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiao Li","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102114460","display_name":"Xiao Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210111085","display_name":"Academy of Broadcasting Science","ror":"https://ror.org/01z4nez64","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210111085"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao Hu","raw_affiliation_strings":["National Radio and Television Administration,Academy of Broadcasting Science,Beijing,China"],"affiliations":[{"raw_affiliation_string":"National Radio and Television Administration,Academy of Broadcasting Science,Beijing,China","institution_ids":["https://openalex.org/I4210111085"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011575768","display_name":"Xiao Chen","orcid":"https://orcid.org/0000-0001-6501-5922"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao Chen","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032993198","display_name":"Hang Pan","orcid":"https://orcid.org/0000-0002-0522-018X"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hang Pan","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009353311","display_name":"Kun Niu","orcid":"https://orcid.org/0000-0003-1877-5982"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Niu","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100375302"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.2079,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.44268477,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"506","last_page":"512"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7437727451324463},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7343460321426392},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.723128080368042},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6176766157150269},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.6161764860153198},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.59992516040802},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5564440488815308},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5397760272026062},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5391576290130615},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5171727538108826},{"id":"https://openalex.org/keywords/backbone-network","display_name":"Backbone network","score":0.440847247838974},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.43978258967399597},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.4110926389694214},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.16800224781036377}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7437727451324463},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7343460321426392},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.723128080368042},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6176766157150269},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.6161764860153198},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.59992516040802},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5564440488815308},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5397760272026062},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5391576290130615},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5171727538108826},{"id":"https://openalex.org/C88796919","wikidata":"https://www.wikidata.org/wiki/Q1142907","display_name":"Backbone network","level":2,"score":0.440847247838974},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.43978258967399597},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.4110926389694214},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.16800224781036377},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpr56361.2022.9956552","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956552","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.6899999976158142}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1963882359","https://openalex.org/W1996950552","https://openalex.org/W2041823554","https://openalex.org/W2046056978","https://openalex.org/W2150769028","https://openalex.org/W2271840356","https://openalex.org/W2726515241","https://openalex.org/W2748488820","https://openalex.org/W2755682845","https://openalex.org/W2794506738","https://openalex.org/W2808631503","https://openalex.org/W2892034016","https://openalex.org/W2899771611","https://openalex.org/W2916104401","https://openalex.org/W2922509574","https://openalex.org/W2962898354","https://openalex.org/W2963077989","https://openalex.org/W2963242190","https://openalex.org/W2963371159","https://openalex.org/W2963466847","https://openalex.org/W2967515867","https://openalex.org/W2969985801","https://openalex.org/W2972369255","https://openalex.org/W3009180590","https://openalex.org/W3010925296","https://openalex.org/W3015197287","https://openalex.org/W3015253990","https://openalex.org/W3160804292","https://openalex.org/W3162520476","https://openalex.org/W3198041020","https://openalex.org/W4289750118","https://openalex.org/W6649446422","https://openalex.org/W6694517276","https://openalex.org/W6736780073","https://openalex.org/W6753575415","https://openalex.org/W6756040250","https://openalex.org/W6771297733"],"related_works":["https://openalex.org/W2953234277","https://openalex.org/W2626256601","https://openalex.org/W2900413183","https://openalex.org/W2529301793","https://openalex.org/W2384121599","https://openalex.org/W147410782","https://openalex.org/W4287804464","https://openalex.org/W3022252430","https://openalex.org/W2103897043","https://openalex.org/W3148366653"],"abstract_inverted_index":{"Text-independent":[0],"speaker":[1,24,66,70,97,124],"verification":[2],"(TI-SV)":[3],"refers":[4],"to":[5,87,101,135],"the":[6],"process":[7],"of":[8,27,60],"verifying":[9],"an":[10],"individual\u2019s":[11],"claimed":[12],"identity":[13],"from":[14],"a":[15,41,52,61,91,117,123,132,167],"given":[16],"speech":[17],"utterance":[18],"with":[19,127,171],"unfixed":[20],"content.":[21],"Most":[22],"deep":[23],"embedding":[25,67,98,156],"networks":[26],"TI-SV":[28],"apply":[29],"temporal":[30],"pooling":[31],"or":[32],"similar":[33],"techniques":[34],"for":[35,45,138,152],"frame-level":[36],"feature":[37],"aggregation,":[38],"and":[39,63,69,79,104,130,147],"adopt":[40],"single":[42],"loss":[43,129],"function":[44],"training.":[46],"In":[47,119],"this":[48],"paper,":[49],"we":[50,121],"propose":[51],"powerful":[53],"hybrid":[54,73],"network,":[55],"named":[56],"HN-MFML,":[57],"which":[58,111],"consists":[59],"backbone":[62],"two":[64],"sub-networks:":[65],"extraction":[68,99],"classification.":[71],"The":[72],"network":[74],"not":[75],"only":[76],"incorporates":[77],"global":[78,103],"local":[80,105],"features,":[81],"but":[82],"also":[83,159],"assigns":[84],"adaptive":[85],"weights":[86],"them.":[88],"It":[89],"adopts":[90],"modified":[92],"ResNet-50":[93],"as":[94],"backbone,":[95],"using":[96],"sub-network":[100,126],"aggregate":[102],"features":[106],"adaptively":[107],"in":[108],"frequency-time":[109],"domain,":[110],"can":[112],"be":[113],"trained":[114],"end-to-end":[115],"by":[116,166],"loss.":[118],"addition,":[120],"add":[122],"classification":[125],"another":[128],"explore":[131],"multi-loss":[133,148],"fusion":[134,149],"jointly":[136],"train":[137],"improving":[139],"generalization.":[140],"We":[141,158],"demonstrate":[142],"that":[143,161],"our":[144],"multi-feature":[145],"aggregation":[146],"are":[150],"superior":[151],"obtaining":[153],"discriminative":[154],"utterance-level":[155],"descriptors.":[157],"show":[160],"HN-MFML":[162],"achieves":[163],"state-of-the-art":[164],"performance":[165],"significant":[168],"margin":[169],"compared":[170],"previous":[172],"methods.":[173]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
