{"id":"https://openalex.org/W4296070419","doi":"https://doi.org/10.21437/interspeech.2022-11183","title":"VCSE: Time-Domain Visual-Contextual Speaker Extraction Network","display_name":"VCSE: Time-Domain Visual-Contextual Speaker Extraction Network","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4296070419","doi":"https://doi.org/10.21437/interspeech.2022-11183"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-11183","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11183","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100367683","display_name":"Junjie Li","orcid":"https://orcid.org/0009-0004-8450-7112"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Junjie Li","raw_affiliation_strings":["Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100579172","display_name":"Meng Ge","orcid":null},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]},{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["CN","SG"],"is_corresponding":false,"raw_author_name":"Meng Ge","raw_affiliation_strings":["Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","Department of Electrical and Computer Engineering, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060530570","display_name":"Zexu Pan","orcid":"https://orcid.org/0000-0002-8106-1176"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Zexu Pan","raw_affiliation_strings":["Department of Electrical and Computer Engineering, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101745213","display_name":"Longbiao Wang","orcid":"https://orcid.org/0000-0002-8094-6861"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longbiao Wang","raw_affiliation_strings":["Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017251198","display_name":"Jianwu Dang","orcid":"https://orcid.org/0000-0002-9237-4821"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]},{"id":"https://openalex.org/I177738480","display_name":"Japan Advanced Institute of Science and Technology","ror":"https://ror.org/03frj4r98","country_code":"JP","type":"education","lineage":["https://openalex.org/I177738480"]}],"countries":["CN","JP"],"is_corresponding":false,"raw_author_name":"Jianwu Dang","raw_affiliation_strings":["Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","Japan Advanced Institute of Science and Technology, Ishikawa, Japan"],"affiliations":[{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Japan Advanced Institute of Science and Technology, Ishikawa, Japan","institution_ids":["https://openalex.org/I177738480"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100367683"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":0.9719,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.77242682,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8161025643348694},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5283637046813965},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.5223192572593689},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4608250856399536},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4535655677318573},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3210465908050537},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.060281336307525635}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8161025643348694},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5283637046813965},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.5223192572593689},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4608250856399536},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4535655677318573},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3210465908050537},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.060281336307525635},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-11183","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11183","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5088964179","display_name":null,"funder_award_id":"T00120220002","funder_id":"https://openalex.org/F4320331102","funder_display_name":"Shenzhen Research Institute of Big Data"},{"id":"https://openalex.org/G861645084","display_name":null,"funder_award_id":"B10120210117-KP02","funder_id":"https://openalex.org/F4320322942","funder_display_name":"Chinese University of Hong Kong"}],"funders":[{"id":"https://openalex.org/F4320322942","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48"},{"id":"https://openalex.org/F4320331102","display_name":"Shenzhen Research Institute of Big Data","ror":"https://ror.org/00z1gwf89"},{"id":"https://openalex.org/F4320333998","display_name":"Shenzhen Research Institute, City University of Hong Kong","ror":"https://ror.org/00xc0ma20"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W185399533","https://openalex.org/W1932061758","https://openalex.org/W2035576074","https://openalex.org/W2099731107","https://openalex.org/W2127851351","https://openalex.org/W2135823751","https://openalex.org/W2221409856","https://openalex.org/W2460742184","https://openalex.org/W2736232392","https://openalex.org/W2803322398","https://openalex.org/W2891205112","https://openalex.org/W2913642561","https://openalex.org/W2952218014","https://openalex.org/W2963654155","https://openalex.org/W2964058413","https://openalex.org/W2973062255","https://openalex.org/W3008400075","https://openalex.org/W3015225820","https://openalex.org/W3095379519","https://openalex.org/W3096936581","https://openalex.org/W3097450720","https://openalex.org/W3097653961","https://openalex.org/W3103434036","https://openalex.org/W3112929926","https://openalex.org/W3123318516","https://openalex.org/W3124666641","https://openalex.org/W3160005627","https://openalex.org/W3163287738","https://openalex.org/W3181255780","https://openalex.org/W3185109982","https://openalex.org/W4223499953","https://openalex.org/W4224918929","https://openalex.org/W4226338831","https://openalex.org/W4285662695","https://openalex.org/W4295308317","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2377297411","https://openalex.org/W3148217948","https://openalex.org/W2375788636","https://openalex.org/W2358561207","https://openalex.org/W2388704129","https://openalex.org/W2392827053","https://openalex.org/W2975617233","https://openalex.org/W2377877252","https://openalex.org/W2362914816","https://openalex.org/W3192589309"],"abstract_inverted_index":{"Speaker":[0],"extraction":[1,73],"seeks":[2],"to":[3,45,87],"extract":[4],"the":[5,50,94,107,111,116,121,127],"target":[6,51,100,118],"speech":[7,101,119],"in":[8,33,60],"a":[9,21,61,68,99],"multitalker":[10],"scenario":[11],"given":[12],"an":[13],"auxiliary":[14],"reference.Such":[15],"reference":[16],"can":[17],"be":[18,43],"auditory,":[19],"i.e.,":[20,25,30],"pre-recorded":[22],"speech,":[23],"visual,":[24],"lip":[26],"movements,":[27],"or":[28],"contextual,":[29],"phonetic":[31,109],"sequence.References":[32],"different":[34],"modalities":[35,59],"provide":[36],"distinct":[37],"and":[38,57,80,105],"complementary":[39],"information":[40],"that":[41,136],"could":[42],"fused":[44],"form":[46],"top-down":[47],"attention":[48],"on":[49,126],"speaker.Previous":[52],"studies":[53],"have":[54],"introduced":[55],"visual":[56,79,103],"contextual":[58,82,123],"single":[62],"model.In":[63],"this":[64],"paper,":[65],"we":[66,97,114],"propose":[67],"two-stage":[69],"time-domain":[70],"visual-contextual":[71],"speaker":[72],"network":[74,140],"named":[75],"VCSE,":[76],"which":[77],"incorporates":[78],"selfenrolled":[81],"cues":[83,104],"stage":[84,86],"by":[85],"take":[88],"full":[89],"advantage":[90],"of":[91],"every":[92],"modality.In":[93],"first":[95],"stage,":[96,113],"pre-extract":[98],"with":[102,120],"estimate":[106],"underlying":[108],"sequence.In":[110],"second":[112],"refine":[115],"pre-extracted":[117],"self-enrolled":[122],"cues.Experimental":[124],"results":[125],"real-world":[128],"Lip":[129],"Reading":[130],"Sentences":[131],"3":[132],"(LRS3)":[133],"database":[134],"demonstrate":[135],"our":[137],"proposed":[138],"VCSE":[139],"consistently":[141],"outperforms":[142],"other":[143],"state-of-the-art":[144],"baselines.":[145]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":7}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
