{"id":"https://openalex.org/W4413125141","doi":"https://doi.org/10.1109/lsp.2025.3596846","title":"DCF-Net: Efficient Target Speaker Extraction by Leveraging Mixture and Enrollment Interactions","display_name":"DCF-Net: Efficient Target Speaker Extraction by Leveraging Mixture and Enrollment Interactions","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4413125141","doi":"https://doi.org/10.1109/lsp.2025.3596846"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2025.3596846","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2025.3596846","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100751826","display_name":"Ke Xue","orcid":"https://orcid.org/0000-0001-6789-2670"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ke Xue","raw_affiliation_strings":["School of Cyberspace Science and Technology, Beijing Institute of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Cyberspace Science and Technology, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085947862","display_name":"Rongfei Fan","orcid":"https://orcid.org/0000-0001-8782-0615"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongfei Fan","raw_affiliation_strings":["School of Cyberspace Science and Technology, Beijing Institute of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Cyberspace Science and Technology, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120315406","display_name":"Changqing Sun","orcid":"https://orcid.org/0009-0005-9050-9699"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chang Sun","raw_affiliation_strings":["School of Computer Science, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035733773","display_name":"Puning Zhao","orcid":"https://orcid.org/0009-0002-3264-3417"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Puning Zhao","raw_affiliation_strings":["School of Cyber Science and Technology, Sun Yat-Sen University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Cyber Science and Technology, Sun Yat-Sen University, Shenzhen, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044674658","display_name":"Jianping An","orcid":"https://orcid.org/0000-0002-6441-9711"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianping An","raw_affiliation_strings":["School of Cyberspace Science and Technology, Beijing Institute of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Cyberspace Science and Technology, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100751826"],"corresponding_institution_ids":["https://openalex.org/I125839683"],"apc_list":null,"apc_paid":null,"fwci":4.7137,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.94923334,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"32","issue":null,"first_page":"3240","last_page":"3244"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9639999866485596,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7101120352745056},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5173242092132568},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.4966285824775696},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.41564705967903137},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3964560627937317},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3376382887363434}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7101120352745056},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5173242092132568},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.4966285824775696},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.41564705967903137},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3964560627937317},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3376382887363434},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2025.3596846","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2025.3596846","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3424211713","display_name":null,"funder_award_id":"62171034","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1986771495","https://openalex.org/W1991139021","https://openalex.org/W2082183045","https://openalex.org/W2127851351","https://openalex.org/W2136372468","https://openalex.org/W2752782242","https://openalex.org/W2938646939","https://openalex.org/W2951130829","https://openalex.org/W2962866211","https://openalex.org/W2964058413","https://openalex.org/W2972541922","https://openalex.org/W2973062255","https://openalex.org/W3008003372","https://openalex.org/W3015191643","https://openalex.org/W3015199127","https://openalex.org/W3016361963","https://openalex.org/W3096893582","https://openalex.org/W3097653961","https://openalex.org/W3120336970","https://openalex.org/W3158779859","https://openalex.org/W3160085755","https://openalex.org/W3162534564","https://openalex.org/W3196570692","https://openalex.org/W3198234746","https://openalex.org/W4367597591","https://openalex.org/W4372260086","https://openalex.org/W4372271325","https://openalex.org/W4372271367","https://openalex.org/W4385245566","https://openalex.org/W4385822827","https://openalex.org/W4390873946","https://openalex.org/W4391307095","https://openalex.org/W4392902910","https://openalex.org/W4392903066","https://openalex.org/W4400105722","https://openalex.org/W4404102497","https://openalex.org/W4408345595"],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W2249138175","https://openalex.org/W2033914206","https://openalex.org/W4313854686","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W2042327336","https://openalex.org/W289407349","https://openalex.org/W2368768466"],"abstract_inverted_index":{"Target":[0],"speaker":[1,22],"extraction":[2,96,135],"(TSE)":[3],"aims":[4],"to":[5,93,137],"isolate":[6],"a":[7,46,63,108],"specific":[8],"speaker\u2019s":[9],"voice":[10],"from":[11,24],"multi-talker":[12],"environments":[13],"using":[14],"enrollment":[15,78],"data.":[16],"While":[17],"current":[18],"approaches":[19],"primarily":[20],"utilize":[21],"embeddings":[23],"enrollment,":[25],"they":[26],"often":[27],"neglect":[28],"contextual":[29,71],"information":[30],"and":[31,38,79,84,87,125],"the":[32,36,56,95,133],"dynamic":[33],"interactions":[34,75],"between":[35,76],"mixture":[37,80],"enrollment.":[39],"To":[40],"address":[41],"this":[42],"limitation,":[43],"we":[44],"propose":[45],"novel":[47],"DualStream":[48,64],"Contextual":[49],"Fusion":[50,65],"Network":[51],"(DCF-Net)":[52],"that":[53,101],"operates":[54],"in":[55,112,123],"time-frequency":[57],"(T-F)":[58],"domain.":[59],"Our":[60],"framework":[61],"introduces":[62],"Block":[66],"(DSFB)":[67],"that:":[68],"1)":[69],"captures":[70],"information,":[72],"2)":[73],"models":[74],"contextualized":[77],"representations":[81,92],"across":[82],"spatial":[83],"channel":[85],"dimensions,":[86],"3)":[88],"employs":[89],"these":[90],"enriched":[91],"guide":[94],"process.":[97],"Comprehensive":[98],"experiments":[99],"show":[100],"DCF-Net":[102],"achieves":[103],"state-of-the-art":[104],"(SOTA)":[105],"performance":[106],"with":[107],"21.6":[109],"dB":[110],"improvement":[111],"scale-invariant":[113],"signal-to-distortion":[114],"ratio":[115],"(SI-SDR)":[116],"on":[117,142],"benchmark":[118],"datasets":[119],"while":[120],"demonstrating":[121],"robustness":[122],"noisy":[124],"reverberant":[126],"conditions.":[127],"Notably,":[128],"our":[129],"model":[130],"significantly":[131],"reduces":[132],"wrong":[134],"rate":[136],"just":[138],"0.4%":[139],"when":[140],"testing":[141],"target":[143],"confusion":[144],"problem":[145],"(TCP),":[146],"underscoring":[147],"its":[148],"practical":[149],"applicability.":[150]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
