{"id":"https://openalex.org/W4312095954","doi":"https://doi.org/10.1109/cisp-bmei56279.2022.9980110","title":"Vision-Guided Speaker Embedding Based Speech Separation","display_name":"Vision-Guided Speaker Embedding Based Speech Separation","publication_year":2022,"publication_date":"2022-11-05","ids":{"openalex":"https://openalex.org/W4312095954","doi":"https://doi.org/10.1109/cisp-bmei56279.2022.9980110"},"language":"en","primary_location":{"id":"doi:10.1109/cisp-bmei56279.2022.9980110","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cisp-bmei56279.2022.9980110","pdf_url":null,"source":{"id":"https://openalex.org/S4363605502","display_name":"2022 15th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 15th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034561537","display_name":"Yuanjie Deng","orcid":null},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuanjie Deng","raw_affiliation_strings":["School of Control Science and Engineering, Shandong University,Jinan,China","School of Control Science and Engineering, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Control Science and Engineering, Shandong University,Jinan,China","institution_ids":["https://openalex.org/I154099455"]},{"raw_affiliation_string":"School of Control Science and Engineering, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101478527","display_name":"Ying Wei","orcid":"https://orcid.org/0000-0001-8217-4765"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Wei","raw_affiliation_strings":["School of Control Science and Engineering, Shandong University,Jinan,China","School of Control Science and Engineering, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Control Science and Engineering, Shandong University,Jinan,China","institution_ids":["https://openalex.org/I154099455"]},{"raw_affiliation_string":"School of Control Science and Engineering, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5034561537"],"corresponding_institution_ids":["https://openalex.org/I154099455"],"apc_list":null,"apc_paid":null,"fwci":0.2455,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.41390205,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"14","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7913780212402344},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7604601383209229},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7374728918075562},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5390372276306152},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.5184585452079773},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5171701312065125},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.48040464520454407},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.47955939173698425},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.45560896396636963},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4552951157093048},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3999711871147156},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.10558217763900757},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.1018279492855072}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7913780212402344},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7604601383209229},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7374728918075562},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5390372276306152},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5184585452079773},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5171701312065125},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.48040464520454407},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.47955939173698425},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.45560896396636963},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4552951157093048},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3999711871147156},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.10558217763900757},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.1018279492855072},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cisp-bmei56279.2022.9980110","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cisp-bmei56279.2022.9980110","pdf_url":null,"source":{"id":"https://openalex.org/S4363605502","display_name":"2022 15th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 15th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7900000214576721}],"awards":[{"id":"https://openalex.org/G5144012530","display_name":null,"funder_award_id":"62171263","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2081144555","https://openalex.org/W2096733369","https://openalex.org/W2120310574","https://openalex.org/W2127851351","https://openalex.org/W2141998673","https://openalex.org/W2143169494","https://openalex.org/W2167734090","https://openalex.org/W2168793898","https://openalex.org/W2194775991","https://openalex.org/W2577762507","https://openalex.org/W2585824449","https://openalex.org/W2734984521","https://openalex.org/W2804093409","https://openalex.org/W2808631503","https://openalex.org/W2883780447","https://openalex.org/W2886232760","https://openalex.org/W2890952074","https://openalex.org/W2962866211","https://openalex.org/W2963082324","https://openalex.org/W2964171275","https://openalex.org/W2972513594","https://openalex.org/W2972568703","https://openalex.org/W2973062255","https://openalex.org/W3000647100","https://openalex.org/W3004146833","https://openalex.org/W3011424113","https://openalex.org/W3015445830","https://openalex.org/W3024147341","https://openalex.org/W3096214032","https://openalex.org/W3123318516","https://openalex.org/W3160672713","https://openalex.org/W3173382920","https://openalex.org/W3182657421","https://openalex.org/W3183230691","https://openalex.org/W6639824700","https://openalex.org/W6684458083","https://openalex.org/W6749863746","https://openalex.org/W6750169759","https://openalex.org/W6753767121","https://openalex.org/W6754392867","https://openalex.org/W6755462816"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2162158162","https://openalex.org/W4247736853","https://openalex.org/W1493012537","https://openalex.org/W1999004162","https://openalex.org/W2175373321","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2938358845","https://openalex.org/W2997340161"],"abstract_inverted_index":{"Speech":[0],"is":[1,38,50,117,134,137],"more":[2,25,138],"affected":[3],"by":[4],"the":[5,10,15,41,53,68,73,81,85,96,109,113,148,154,157],"environment":[6],"and":[7,21,79,152],"noise,":[8],"while":[9],"visual":[11,56],"information":[12],"corresponding":[13],"to":[14,63,111],"speaker,":[16],"such":[17],"as":[18],"lip":[19],"movement":[20],"facial":[22],"appearance":[23],"are":[24],"robust.":[26],"In":[27],"this":[28],"paper,":[29],"a":[30,76,89,118],"vision-guided":[31],"speaker":[32,48,65,124],"embedding":[33,49,125],"based":[34],"speech":[35,45,71,83,107,114,133],"separation":[36,110],"framework":[37],"proposed":[39,60,158],"for":[40],"scenario":[42],"of":[43,55,72,98,156],"mixed":[44,132],"separation.":[46],"The":[47,92],"integrated":[51],"on":[52,147],"basis":[54],"guidance.":[57],"Specifically,":[58],"we":[59],"two":[61],"schemes":[62],"extract":[64,112],"embedding:":[66],"using":[67,80,99],"clean":[69,100,106],"additional":[70,101],"speakers":[74],"in":[75,88,140],"one-stage":[77],"network,":[78],"separated":[82],"at":[84],"first":[86],"stage":[87],"two-stage":[90,93],"network.":[91],"scheme":[94],"avoids":[95],"limitation":[97],"speech.":[102],"It":[103],"utilizes":[104],"gradually":[105],"during":[108],"information,":[115],"which":[116],"continuous":[119],"self-improvement":[120],"process.":[121],"Therefore,":[122],"effective":[123],"can":[126],"be":[127],"extracted":[128],"even":[129],"when":[130],"only":[131],"present.":[135],"This":[136],"practical":[139],"real-world":[141],"scenarios.":[142],"We":[143],"conducted":[144],"comparative":[145],"experiments":[146],"public":[149],"dataset":[150],"VoxCeleb2":[151],"demonstrated":[153],"effectiveness":[155],"method.":[159]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
