{"id":"https://openalex.org/W4408353733","doi":"https://doi.org/10.1109/icassp49660.2025.10887817","title":"DSINet: Towards Real-Time Target Speaker Extraction with Dynamic Speaker Information Fusion","display_name":"DSINet: Towards Real-Time Target Speaker Extraction with Dynamic Speaker Information Fusion","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408353733","doi":"https://doi.org/10.1109/icassp49660.2025.10887817"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10887817","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887817","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088554529","display_name":"Fengyuan Hao","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fengyuan Hao","raw_affiliation_strings":["Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research,Beijing,China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053757616","display_name":"Andong Li","orcid":"https://orcid.org/0000-0003-4094-8448"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Andong Li","raw_affiliation_strings":["Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research,Beijing,China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100369688","display_name":"Xiaodong Li","orcid":"https://orcid.org/0000-0001-8285-8446"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaodong Li","raw_affiliation_strings":["Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research,Beijing,China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070266277","display_name":"Chengshi Zheng","orcid":"https://orcid.org/0000-0001-5656-994X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengshi Zheng","raw_affiliation_strings":["Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research,Beijing,China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5088554529"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210099069"],"apc_list":null,"apc_paid":null,"fwci":2.8599,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.89837178,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.953000009059906,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7667293548583984},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.6837260127067566},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6379191875457764},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5710152387619019},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5540593266487122},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.47043368220329285},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4116794466972351},{"id":"https://openalex.org/keywords/information-fusion","display_name":"Information fusion","score":0.410861998796463},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36223214864730835}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7667293548583984},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.6837260127067566},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6379191875457764},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5710152387619019},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5540593266487122},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.47043368220329285},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4116794466972351},{"id":"https://openalex.org/C2982962833","wikidata":"https://www.wikidata.org/wiki/Q17092450","display_name":"Information fusion","level":2,"score":0.410861998796463},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36223214864730835},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10887817","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887817","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W1991139021","https://openalex.org/W2118774185","https://openalex.org/W2127851351","https://openalex.org/W2221409856","https://openalex.org/W2516001803","https://openalex.org/W2802023636","https://openalex.org/W2891833136","https://openalex.org/W2952218014","https://openalex.org/W2962866211","https://openalex.org/W2964058413","https://openalex.org/W2973054998","https://openalex.org/W2973062255","https://openalex.org/W3015191643","https://openalex.org/W3016361963","https://openalex.org/W3097653961","https://openalex.org/W3162493033","https://openalex.org/W3162534564","https://openalex.org/W3198234746","https://openalex.org/W4296068983","https://openalex.org/W4367597591","https://openalex.org/W4375869051","https://openalex.org/W4375928773","https://openalex.org/W4385411733","https://openalex.org/W4385756463","https://openalex.org/W4385822827","https://openalex.org/W4385822829","https://openalex.org/W4388620484","https://openalex.org/W4391021760","https://openalex.org/W4392902910","https://openalex.org/W4400105722","https://openalex.org/W4403126636","https://openalex.org/W6631190155","https://openalex.org/W6775369996","https://openalex.org/W6810827206"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W66821593","https://openalex.org/W2149220986","https://openalex.org/W1521299571","https://openalex.org/W1493012537","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W3141593045","https://openalex.org/W2144470400","https://openalex.org/W2911612049"],"abstract_inverted_index":{"Target":[0],"speaker":[1,46,71,80,84,96],"extraction":[2],"(TSE)":[3],"aims":[4],"to":[5,50,129],"directly":[6],"extract":[7],"the":[8,15,44,52,59,65,78,91,102,106,112,119],"desired":[9,107],"speech":[10],"given":[11],"enrollment":[12],"utterances":[13],"of":[14,58,75,94],"target":[16,60,79,95],"speaker.":[17],"Despite":[18],"significant":[19],"progress":[20],"in":[21],"recent":[22],"years,":[23],"most":[24],"existing":[25],"methods":[26,132],"remain":[27],"non-causal":[28,131],"and":[29,54,114,125],"computationally":[30],"intensive.":[31],"This":[32,62],"paper":[33],"introduces":[34],"DSINet,":[35],"a":[36,82],"real-time":[37],"time-frequency":[38],"(T-F)":[39],"domain":[40],"method":[41,63,121],"that":[42,118],"leverages":[43],"dynamic":[45,83],"information":[47,85,97],"fusion":[48,86],"mechanism":[49,87],"estimate":[51],"real":[53],"imaginary":[55],"(RI)":[56],"components":[57],"speech.":[61,108],"incorporates":[64],"T-F":[66],"band-split":[67],"modeling":[68],"as":[69],"primary":[70],"extractor.":[72],"Moreover,":[73],"instead":[74],"explicitly":[76],"calculating":[77],"embedding,":[81],"is":[88],"proposed":[89,120],"for":[90],"efficient":[92],"utilization":[93],"within":[98],"each":[99],"mixture,":[100],"guiding":[101],"backbone":[103],"extractor":[104],"towards":[105],"Experimental":[109],"results":[110],"on":[111],"WSJ0-2mix":[113],"WHAMR!":[115],"datasets":[116],"confirm":[117],"exhibits":[122],"remarkable":[123],"scalability":[124],"achieves":[126],"comparable":[127],"performance":[128],"prominent":[130],"under":[133],"different":[134],"model":[135],"sizes.":[136]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
