{"id":"https://openalex.org/W4402351066","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650374","title":"A Novel Loss Incorporating Residual Signal Information for Target Speaker Extraction Under Low-SNR Conditions","display_name":"A Novel Loss Incorporating Residual Signal Information for Target Speaker Extraction Under Low-SNR Conditions","publication_year":2024,"publication_date":"2024-06-30","ids":{"openalex":"https://openalex.org/W4402351066","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650374"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn60899.2024.10650374","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn60899.2024.10650374","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100712619","display_name":"Sijie Wang","orcid":"https://orcid.org/0000-0002-2912-0773"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wang Sijie","raw_affiliation_strings":["Xinjiang University,School of Computer Science and Technology,Urumqi,China"],"affiliations":[{"raw_affiliation_string":"Xinjiang University,School of Computer Science and Technology,Urumqi,China","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075133420","display_name":"Askar Hamdulla","orcid":"https://orcid.org/0000-0002-2321-308X"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Askar Hamdulla","raw_affiliation_strings":["Xinjiang University,School of Computer Science and Technology,Urumqi,China"],"affiliations":[{"raw_affiliation_string":"Xinjiang University,School of Computer Science and Technology,Urumqi,China","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023562009","display_name":"Mijit Ablimit","orcid":"https://orcid.org/0009-0008-0781-4150"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mijit Ablimit","raw_affiliation_strings":["Xinjiang University,School of Computer Science and Technology,Urumqi,China"],"affiliations":[{"raw_affiliation_string":"Xinjiang University,School of Computer Science and Technology,Urumqi,China","institution_ids":["https://openalex.org/I96908189"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100712619"],"corresponding_institution_ids":["https://openalex.org/I96908189"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16265996,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6998152732849121},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6925910115242004},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5611927509307861},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.47176840901374817},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.47066912055015564},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.44424712657928467},{"id":"https://openalex.org/keywords/signal-to-noise-ratio","display_name":"Signal-to-noise ratio (imaging)","score":0.4391654431819916},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4347136914730072},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3924703001976013},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3284943103790283},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.21600571274757385},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.1524253487586975}],"concepts":[{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6998152732849121},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6925910115242004},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5611927509307861},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.47176840901374817},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.47066912055015564},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.44424712657928467},{"id":"https://openalex.org/C13944312","wikidata":"https://www.wikidata.org/wiki/Q7512748","display_name":"Signal-to-noise ratio (imaging)","level":2,"score":0.4391654431819916},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4347136914730072},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3924703001976013},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3284943103790283},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.21600571274757385},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.1524253487586975},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn60899.2024.10650374","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn60899.2024.10650374","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W133022121","https://openalex.org/W1485161427","https://openalex.org/W1845880232","https://openalex.org/W2087368178","https://openalex.org/W2104969945","https://openalex.org/W2124149378","https://openalex.org/W2135029798","https://openalex.org/W2167873188","https://openalex.org/W2221409856","https://openalex.org/W2237671050","https://openalex.org/W2344317287","https://openalex.org/W2460742184","https://openalex.org/W2887067836","https://openalex.org/W2951130829","https://openalex.org/W2952218014","https://openalex.org/W2973062255","https://openalex.org/W3015526955","https://openalex.org/W3092562885","https://openalex.org/W3099330747","https://openalex.org/W3113290170","https://openalex.org/W4233392025","https://openalex.org/W4247773827","https://openalex.org/W4296068983","https://openalex.org/W4297798711","https://openalex.org/W4297798768","https://openalex.org/W4312751558","https://openalex.org/W4321497812","https://openalex.org/W4323767251","https://openalex.org/W4367190594","https://openalex.org/W4375869348","https://openalex.org/W4376456999","https://openalex.org/W4381327724","https://openalex.org/W4392903118","https://openalex.org/W6680012447"],"related_works":["https://openalex.org/W2560215812","https://openalex.org/W2949601986","https://openalex.org/W2788972299","https://openalex.org/W2498789492","https://openalex.org/W2521347458","https://openalex.org/W2729981612","https://openalex.org/W4233449973","https://openalex.org/W2925692864","https://openalex.org/W2768526084","https://openalex.org/W1560125148"],"abstract_inverted_index":{"Most":[0],"target":[1],"speaker":[2],"extraction":[3],"(TSE)":[4],"methods":[5,88,131,146],"primarily":[6],"focus":[7],"on":[8],"scenarios":[9],"characterized":[10],"by":[11,69],"conventional":[12],"signal-to-noise":[13],"ratio":[14],"(SNR),":[15],"overlooking":[16],"the":[17,21,41,45,63,93,115,122,129,136,139,143,149,154],"substantial":[18],"decline":[19],"in":[20],"quality":[22],"of":[23,53,65,73,142,151],"extracted":[24],"speech":[25],"under":[26],"low-SNR":[27],"conditions.":[28],"To":[29],"address":[30],"this":[31],"issue,":[32],"we":[33,61],"propose":[34],"to":[35,40,44,102,106,111,135],"add":[36],"an":[37],"item":[38],"related":[39],"residual":[42,66],"signal":[43,67],"original":[46],"SI-SDR-based":[47],"loss":[48],"function,":[49],"with":[50,96,148],"no":[51],"need":[52],"additional":[54],"training":[55],"labels":[56],"or":[57],"network":[58],"modules.":[59],"Specifically,":[60],"investigate":[62],"impact":[64],"information":[68],"applying":[70],"three":[71],"types":[72],"constraints,":[74],"which":[75],"are":[76,89,119,132],"direction,":[77],"Euclidean":[78,84],"distance,":[79],"and":[80,109,114,138],"projection":[81],"distance":[82],"+":[83],"distance.":[85],"The":[86],"proposed":[87,130],"experimentally":[90],"evaluated":[91],"using":[92],"WSJ0-2mix-extr":[94],"datasets,":[95],"SNR":[97,161],"range":[98],"selected":[99,120],"as":[100,121],"-10":[101,107],"-5":[103],"dB,":[104,113],"-15":[105,112],"dB":[108],"-20":[110],"SpEx":[116],"series":[117],"models":[118],"validation":[123],"models.":[124],"Experimental":[125],"results":[126],"show":[127],"that":[128],"all":[133],"superior":[134],"baseline,":[137],"performance":[140,159],"improvement":[141],"first":[144],"two":[145],"increases":[147],"decrease":[150],"SNR,":[152],"but":[153],"third":[155],"method":[156],"shows":[157],"better":[158],"when":[160],"is":[162],"higher.":[163]},"counts_by_year":[],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
