{"id":"https://openalex.org/W3008400075","doi":"https://doi.org/10.1109/asru46091.2019.9003983","title":"Time Domain Audio Visual Speech Separation","display_name":"Time Domain Audio Visual Speech Separation","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3008400075","doi":"https://doi.org/10.1109/asru46091.2019.9003983","mag":"3008400075"},"language":"en","primary_location":{"id":"doi:10.1109/asru46091.2019.9003983","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru46091.2019.9003983","pdf_url":null,"source":{"id":"https://openalex.org/S4306498489","display_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101674460","display_name":"Jian Wu","orcid":"https://orcid.org/0000-0002-3101-7011"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jian Wu","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101467338","display_name":"Yong Xu","orcid":"https://orcid.org/0000-0003-4944-6890"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yong Xu","raw_affiliation_strings":["Tencent AI Lab, Bellevue, USA"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Bellevue, USA","institution_ids":["https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056567731","display_name":"Shixiong Zhang","orcid":"https://orcid.org/0000-0002-0314-9199"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shi-Xiong Zhang","raw_affiliation_strings":["Tencent AI Lab, Bellevue, USA"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Bellevue, USA","institution_ids":["https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072495935","display_name":"Lianwu Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lian-Wu Chen","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106407019","display_name":"Meng Yu","orcid":"https://orcid.org/0000-0002-0031-9156"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Meng Yu","raw_affiliation_strings":["Tencent AI Lab, Bellevue, USA"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Bellevue, USA","institution_ids":["https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100668966","display_name":"Lei Xie","orcid":"https://orcid.org/0000-0001-8234-0823"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Xie","raw_affiliation_strings":["School of Computer Science, Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Yu","raw_affiliation_strings":["Tencent AI Lab, Bellevue, USA"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Bellevue, USA","institution_ids":["https://openalex.org/I4210108985"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101674460"],"corresponding_institution_ids":["https://openalex.org/I2250653659"],"apc_list":null,"apc_paid":null,"fwci":10.018,"has_fulltext":false,"cited_by_count":113,"citation_normalized_percentile":{"value":0.98874011,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"667","last_page":"673"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8162651062011719},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7129684686660767},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.6370186805725098},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5612162351608276},{"id":"https://openalex.org/keywords/monaural","display_name":"Monaural","score":0.5549517273902893},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.5093704462051392},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4934372007846832},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4835529029369354},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.45844152569770813},{"id":"https://openalex.org/keywords/frequency-domain","display_name":"Frequency domain","score":0.441353440284729},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4165516495704651},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3555786609649658},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.3383990526199341},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.19501957297325134},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.14737877249717712}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8162651062011719},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7129684686660767},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.6370186805725098},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5612162351608276},{"id":"https://openalex.org/C102894143","wikidata":"https://www.wikidata.org/wiki/Q1323979","display_name":"Monaural","level":2,"score":0.5549517273902893},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.5093704462051392},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4934372007846832},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4835529029369354},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.45844152569770813},{"id":"https://openalex.org/C19118579","wikidata":"https://www.wikidata.org/wiki/Q786423","display_name":"Frequency domain","level":2,"score":0.441353440284729},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4165516495704651},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3555786609649658},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.3383990526199341},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.19501957297325134},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.14737877249717712},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru46091.2019.9003983","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru46091.2019.9003983","pdf_url":null,"source":{"id":"https://openalex.org/S4306498489","display_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1482149378","https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W1836465849","https://openalex.org/W2194775991","https://openalex.org/W2221409856","https://openalex.org/W2291877678","https://openalex.org/W2531409750","https://openalex.org/W2551572271","https://openalex.org/W2558649592","https://openalex.org/W2734774145","https://openalex.org/W2749510669","https://openalex.org/W2883383043","https://openalex.org/W2890952074","https://openalex.org/W2894785362","https://openalex.org/W2949117887","https://openalex.org/W2952218014","https://openalex.org/W2952746495","https://openalex.org/W2962715207","https://openalex.org/W2962788625","https://openalex.org/W2962866211","https://openalex.org/W2962905190","https://openalex.org/W2962935966","https://openalex.org/W2963082324","https://openalex.org/W2963317762","https://openalex.org/W2963528589","https://openalex.org/W2964058413","https://openalex.org/W2964121744","https://openalex.org/W2964171275","https://openalex.org/W2964207404","https://openalex.org/W2964238697","https://openalex.org/W2972756321","https://openalex.org/W2973062255","https://openalex.org/W3099330747","https://openalex.org/W3123318516","https://openalex.org/W6631362777","https://openalex.org/W6754392867"],"related_works":["https://openalex.org/W2496295964","https://openalex.org/W2336887028","https://openalex.org/W1984921740","https://openalex.org/W2070982348","https://openalex.org/W2365485488","https://openalex.org/W1976952689","https://openalex.org/W2910493550","https://openalex.org/W2231892291","https://openalex.org/W1911859126","https://openalex.org/W642007152"],"abstract_inverted_index":{"Audio-visual":[0],"multi-modal":[1,48,85],"modeling":[2],"has":[3],"been":[4],"demonstrated":[5],"to":[6,46,62,120],"be":[7],"effective":[8],"in":[9],"many":[10],"speech":[11,16,19,43,58],"related":[12],"tasks,":[13],"such":[14],"as":[15],"recognition":[17],"and":[18,50,88,109,115,123],"enhancement.":[20],"This":[21],"paper":[22],"introduces":[23],"a":[24,74,84],"new":[25],"time-domain":[26],"audio-visual":[27,57,125],"architecture":[28,37,69],"for":[29],"target":[30],"speaker":[31],"extraction":[32],"from":[33,60,81],"monaural":[34],"mixtures.":[35],"The":[36,64],"generalizes":[38],"the":[39,55],"previous":[40],"TasNet":[41,122],"(time-domain":[42],"separation":[44,59,86],"network)":[45],"enable":[47],"learning":[49],"at":[51],"meanwhile":[52],"it":[53],"extends":[54],"classical":[56],"frequency-domain":[61,124],"time-domain.":[63],"main":[65],"components":[66],"of":[67],"proposed":[68],"include":[70],"an":[71,89],"audio":[72,90],"encoder,":[73],"video":[75,82],"encoder":[76],"that":[77,103],"extracts":[78],"lip":[79],"embedding":[80],"streams,":[83],"network":[87],"decoder.":[91],"Experiments":[92],"on":[93,97,113],"simulated":[94],"mixtures":[95],"based":[96],"recently":[98],"released":[99],"LRS2":[100],"dataset":[101],"show":[102],"our":[104],"method":[105],"can":[106],"bring":[107],"3dB+":[108],"4dB+":[110],"Si-SNR":[111],"improvements":[112],"two-":[114],"three-speaker":[116],"cases":[117],"respectively,":[118],"compared":[119],"audio-only":[121],"networks.":[126]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":22},{"year":2022,"cited_by_count":20},{"year":2021,"cited_by_count":17},{"year":2020,"cited_by_count":20},{"year":2019,"cited_by_count":3}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
