{"id":"https://openalex.org/W4392825061","doi":"https://doi.org/10.1109/taslp.2024.3376154","title":"Dual-Channel Target Speaker Extraction Based on Conditional Variational Autoencoder and Directional Information","display_name":"Dual-Channel Target Speaker Extraction Based on Conditional Variational Autoencoder and Directional Information","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4392825061","doi":"https://doi.org/10.1109/taslp.2024.3376154"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3376154","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3376154","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/6633080/10472633.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://ieeexplore.ieee.org/ielx7/6570655/6633080/10472633.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092276174","display_name":"Rui Wang","orcid":"https://orcid.org/0009-0003-0770-9936"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Rui Wang","raw_affiliation_strings":["Graduate School of Informatics, Nagoya University, Aichi, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Nagoya University, Aichi, Japan","institution_ids":["https://openalex.org/I60134161"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061981123","display_name":"Li Li","orcid":"https://orcid.org/0000-0002-3121-7857"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Li Li","raw_affiliation_strings":["Information Technology Center, Nagoya University, Aichi, Japan"],"affiliations":[{"raw_affiliation_string":"Information Technology Center, Nagoya University, Aichi, Japan","institution_ids":["https://openalex.org/I60134161"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078330211","display_name":"Tomoki Toda","orcid":"https://orcid.org/0000-0001-8146-1279"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tomoki Toda","raw_affiliation_strings":["Information Technology Center, Nagoya University, Aichi, Japan"],"affiliations":[{"raw_affiliation_string":"Information Technology Center, Nagoya University, Aichi, Japan","institution_ids":["https://openalex.org/I60134161"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5092276174"],"corresponding_institution_ids":["https://openalex.org/I60134161"],"apc_list":null,"apc_paid":null,"fwci":1.7227,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.85793272,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"32","issue":null,"first_page":"1968","last_page":"1979"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9790999889373779,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.9229069948196411},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.5900291800498962},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5839162468910217},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5304194092750549},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.5044420957565308},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.4824676811695099},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.45306405425071716},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3883708715438843},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37620946764945984},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.19101962447166443},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.13705003261566162},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.11774754524230957},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.0843183696269989},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.05169668793678284}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.9229069948196411},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.5900291800498962},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5839162468910217},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5304194092750549},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.5044420957565308},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.4824676811695099},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.45306405425071716},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3883708715438843},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37620946764945984},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.19101962447166443},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.13705003261566162},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.11774754524230957},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0843183696269989},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.05169668793678284},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3376154","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3376154","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/6633080/10472633.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/taslp.2024.3376154","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3376154","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/6633080/10472633.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392825061.pdf","grobid_xml":"https://content.openalex.org/works/W4392825061.grobid-xml"},"referenced_works_count":71,"referenced_works":["https://openalex.org/W1543386260","https://openalex.org/W1845880232","https://openalex.org/W2013096382","https://openalex.org/W2024490156","https://openalex.org/W2068664809","https://openalex.org/W2070497964","https://openalex.org/W2087368178","https://openalex.org/W2093225945","https://openalex.org/W2095450221","https://openalex.org/W2096855653","https://openalex.org/W2098723113","https://openalex.org/W2101609516","https://openalex.org/W2113145093","https://openalex.org/W2113990625","https://openalex.org/W2114461480","https://openalex.org/W2117678320","https://openalex.org/W2119647652","https://openalex.org/W2134910060","https://openalex.org/W2140592340","https://openalex.org/W2147665979","https://openalex.org/W2155112232","https://openalex.org/W2159485234","https://openalex.org/W2159956319","https://openalex.org/W2161270956","https://openalex.org/W2170140777","https://openalex.org/W2171843383","https://openalex.org/W2345067732","https://openalex.org/W2408744528","https://openalex.org/W2412956798","https://openalex.org/W2475682732","https://openalex.org/W2485273068","https://openalex.org/W2766672686","https://openalex.org/W2774830625","https://openalex.org/W2788241093","https://openalex.org/W2789934054","https://openalex.org/W2808718747","https://openalex.org/W2883322837","https://openalex.org/W2891833136","https://openalex.org/W2918296821","https://openalex.org/W2939360348","https://openalex.org/W2943237054","https://openalex.org/W2951431217","https://openalex.org/W2954049404","https://openalex.org/W2962866211","https://openalex.org/W2963082324","https://openalex.org/W2963341071","https://openalex.org/W2963375116","https://openalex.org/W2963933499","https://openalex.org/W2964171275","https://openalex.org/W2972513594","https://openalex.org/W2972516210","https://openalex.org/W2972802573","https://openalex.org/W2973062255","https://openalex.org/W2979850772","https://openalex.org/W3000508686","https://openalex.org/W3008400075","https://openalex.org/W3015233996","https://openalex.org/W3043433769","https://openalex.org/W3097653961","https://openalex.org/W3097756030","https://openalex.org/W4205778870","https://openalex.org/W4214698081","https://openalex.org/W4224918929","https://openalex.org/W4226115251","https://openalex.org/W4233392025","https://openalex.org/W4245919820","https://openalex.org/W4289665794","https://openalex.org/W4312069036","https://openalex.org/W4372347386","https://openalex.org/W6675944832","https://openalex.org/W6810827206"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2159052453","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W2145836866","https://openalex.org/W2803255133","https://openalex.org/W2952912015"],"abstract_inverted_index":{"Target":[0],"speaker":[1],"extraction":[2],"(TSE)":[3],"has":[4],"become":[5],"an":[6,132],"attractive":[7],"research":[8],"topic":[9],"in":[10,196],"recent":[11],"years.":[12],"However,":[13],"TSE":[14,31,48,75],"under":[15,33,59,207],"the":[16,47,51,64,78,100,110,140,149,156,164,169,181,193,215,223],"underdetermined":[17,34,60],"conditions":[18,61],"is":[19,40,57,106,145,226],"still":[20],"a":[21,29,44,67,73,122],"challenge.":[22],"In":[23],"this":[24],"paper,":[25],"we":[26,162,220],"deal":[27],"with":[28,77,192],"dual-channel":[30,74],"problem":[32],"conditions.":[35],"Geometric":[36],"source":[37,69,90],"separation":[38],"(GSS)":[39],"used":[41,119,146],"to":[42,46,108,120,147,154],"be":[43,174],"solution":[45],"problem,":[49],"but":[50],"performance":[52],"of":[53,63,66,81,103,158,198,217],"conventional":[54],"GSS":[55],"methods":[56],"limited":[58],"because":[62],"lack":[65],"powerful":[68,89],"model.":[70],"We":[71],"propose":[72],"method":[76,183,195,225],"combined":[79],"capabilities":[80],"target":[82,101,150,170],"selection":[83],"based":[84],"on":[85,99],"geometric":[86,96],"constraints,":[87],"more":[88],"modeling,":[91],"and":[92,112,126,187,202],"nonlinear":[93],"postprocessing.":[94],"A":[95],"constraint":[97],"(GC)":[98],"direction":[102],"arrival":[104],"(DOA)":[105],"applied":[107],"select":[109],"target,":[111],"two":[113],"conditional":[114],"variational":[115],"autoencoders":[116],"(CVAEs)":[117],"are":[118],"model":[121],"single":[123],"speaker's":[124,151],"speech":[125,144],"interference":[127,142],"mixture":[128,143],"speech.":[129,152],"For":[130],"postprocessing,":[131],"ideal":[133],"ratio":[134,200,204],"time\u2013frequency":[135],"(T\u2013F)":[136],"mask":[137],"estimated":[138],"from":[139],"separated":[141],"extract":[148],"Moreover,":[153],"overcome":[155],"impact":[157],"DOA":[159,171,229],"estimation":[160,230],"errors,":[161],"improve":[163],"objective":[165],"function":[166],"so":[167],"that":[168,180,222],"information":[172],"can":[173],"modified.":[175],"The":[176],"experimental":[177,218],"results":[178],"demonstrate":[179],"proposed":[182],"achieves":[184],"6.24":[185],"dB":[186,189],"8.37":[188],"improvements":[190],"compared":[191],"baseline":[194],"terms":[197],"signal-to-distortion":[199],"(SDR)":[201],"source-to-interference":[203],"(SIR),":[205],"respectively,":[206],"medium":[208],"reverberation":[209],"for":[210],"470":[211],"ms.":[212],"Furthermore,":[213],"through":[214],"analysis":[216],"results,":[219],"found":[221],"improvement":[224],"robust":[227],"against":[228],"errors.":[231]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
