{"id":"https://openalex.org/W4402592533","doi":"https://doi.org/10.1109/taslp.2024.3463411","title":"Audio-Visual Fusion With Temporal Convolutional Attention Network for Speech Separation","display_name":"Audio-Visual Fusion With Temporal Convolutional Attention Network for Speech Separation","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4402592533","doi":"https://doi.org/10.1109/taslp.2024.3463411"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3463411","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3463411","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032068623","display_name":"Debang Liu","orcid":"https://orcid.org/0000-0002-7411-9683"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Debang Liu","raw_affiliation_strings":["School of Communication and Information Engineering, Chongqing University of Posts and Telecommunications, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Communication and Information Engineering, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005264903","display_name":"Tianqi Zhang","orcid":"https://orcid.org/0000-0003-2160-7701"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianqi Zhang","raw_affiliation_strings":["School of Communication and Information Engineering, Chongqing University of Posts and Telecommunications, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Communication and Information Engineering, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026658144","display_name":"Mads Gr\u00e6sb\u00f8ll Christensen","orcid":"https://orcid.org/0000-0003-3586-7969"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Mads Gr\u00e6sb\u00f8ll Christensen","raw_affiliation_strings":["Audio Analysis Laboratory, CREATE, Aalborg University, Aalborg, Denmark"],"affiliations":[{"raw_affiliation_string":"Audio Analysis Laboratory, CREATE, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011911524","display_name":"Chen Yi","orcid":"https://orcid.org/0000-0002-2844-5787"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chen Yi","raw_affiliation_strings":["School of Communication and Information Engineering, Chongqing University of Posts and Telecommunications, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Communication and Information Engineering, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076399232","display_name":"Zeliang An","orcid":"https://orcid.org/0000-0002-9947-9634"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeliang An","raw_affiliation_strings":["School of Communication and Information Engineering, Chongqing University of Posts and Telecommunications, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Communication and Information Engineering, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5032068623"],"corresponding_institution_ids":["https://openalex.org/I10535382"],"apc_list":null,"apc_paid":null,"fwci":2.3957,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.89836795,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"32","issue":null,"first_page":"4647","last_page":"4660"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.8218266367912292},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6298621892929077},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5591050386428833},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5413839817047119},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.5371305346488953},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.47973814606666565},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.324639230966568},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.21208810806274414},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.07546728849411011},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.07449710369110107}],"concepts":[{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.8218266367912292},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6298621892929077},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5591050386428833},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5413839817047119},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.5371305346488953},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.47973814606666565},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.324639230966568},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.21208810806274414},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.07546728849411011},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.07449710369110107},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2024.3463411","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3463411","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:publications/353bd9d5-2703-4e8b-ab48-8a9350c9cfbe","is_oa":false,"landing_page_url":"https://vbn.aau.dk/da/publications/353bd9d5-2703-4e8b-ab48-8a9350c9cfbe","pdf_url":null,"source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Liu, D, Zhang, T, Christensen, M G, Yi, C & An, Z 2024, 'Audio-Visual Fusion With Temporal Convolutional Attention Network for Speech Separation', IEEE/ACM Transactions on Audio, Speech, and Language Processing, vol. 32, pp. 4647-4660. https://doi.org/10.1109/TASLP.2024.3463411","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1270112442","display_name":null,"funder_award_id":"61702065","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G281325732","display_name":null,"funder_award_id":"61771085","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2926930228","display_name":null,"funder_award_id":"61671095","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G59648577","display_name":null,"funder_award_id":"CSTC2009CA2003","funder_id":"https://openalex.org/F4320328718","funder_display_name":"Chongqing Municipal Key Laboratory of Signal and Information Processing"},{"id":"https://openalex.org/G603079131","display_name":null,"funder_award_id":"62201113","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7086527820","display_name":null,"funder_award_id":"61701067","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7793339091","display_name":null,"funder_award_id":"cstc2021jcyj-msxmX0836","funder_id":"https://openalex.org/F4320323172","funder_display_name":"Natural Science Foundation of Chongqing"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323172","display_name":"Natural Science Foundation of Chongqing","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320328718","display_name":"Chongqing Municipal Key Laboratory of Signal and Information Processing","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":71,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1552314771","https://openalex.org/W1677182931","https://openalex.org/W1790748249","https://openalex.org/W1973681148","https://openalex.org/W1991139021","https://openalex.org/W2002621203","https://openalex.org/W2011301426","https://openalex.org/W2015143272","https://openalex.org/W2029199293","https://openalex.org/W2031647436","https://openalex.org/W2069681747","https://openalex.org/W2081144555","https://openalex.org/W2088079940","https://openalex.org/W2127851351","https://openalex.org/W2141998673","https://openalex.org/W2187089797","https://openalex.org/W2219249508","https://openalex.org/W2221409856","https://openalex.org/W2395263859","https://openalex.org/W2460742184","https://openalex.org/W2510642588","https://openalex.org/W2531409750","https://openalex.org/W2550143307","https://openalex.org/W2577762507","https://openalex.org/W2734774145","https://openalex.org/W2734984521","https://openalex.org/W2735663686","https://openalex.org/W2777466939","https://openalex.org/W2785860907","https://openalex.org/W2788241093","https://openalex.org/W2792643794","https://openalex.org/W2804093409","https://openalex.org/W2808631503","https://openalex.org/W2844030168","https://openalex.org/W2888868298","https://openalex.org/W2890952074","https://openalex.org/W2952218014","https://openalex.org/W2959214850","https://openalex.org/W2962715207","https://openalex.org/W2962866211","https://openalex.org/W2962935966","https://openalex.org/W2963082324","https://openalex.org/W2963307811","https://openalex.org/W2963654155","https://openalex.org/W2964058413","https://openalex.org/W2964171275","https://openalex.org/W2964207404","https://openalex.org/W2972513594","https://openalex.org/W3008400075","https://openalex.org/W3015199127","https://openalex.org/W3020804303","https://openalex.org/W3024147341","https://openalex.org/W3098686642","https://openalex.org/W3099330747","https://openalex.org/W3116298410","https://openalex.org/W3124794156","https://openalex.org/W3124972797","https://openalex.org/W3163652268","https://openalex.org/W3182657421","https://openalex.org/W3206008172","https://openalex.org/W4289665794","https://openalex.org/W4295185249","https://openalex.org/W4297775537","https://openalex.org/W4311088617","https://openalex.org/W4322730862","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6688816777","https://openalex.org/W6737664043","https://openalex.org/W6749029207"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3137890128","https://openalex.org/W1984634519","https://openalex.org/W4245955731","https://openalex.org/W2393726419"],"abstract_inverted_index":{"Currently,":[0],"audio-visual":[1,39,50,71,75,158,169],"speech":[2,17,72,83],"separation":[3,73,84,130,179,211],"methods":[4,24],"utilize":[5],"the":[6,16,19,27,37,58,99,104,116,127,140,144,153,157,167,178,186],"speaker's":[7,59],"audio":[8,108,145],"and":[9,109,112,123,146,149,164,184,201,221],"visual":[10,110,147],"correlation":[11],"information":[12,142],"to":[13,35,44,57,102,119,137,155,177,181],"help":[14],"separate":[15,185],"of":[18,29,143,188],"target":[20],"speaker.":[21,190],"However,":[22],"these":[23],"commonly":[25],"use":[26,113,134,152],"approach":[28],"feature":[30,159],"concatenation":[31],"with":[32,77,161],"linear":[33],"mapping":[34],"obtain":[36],"fused":[38,168],"features,":[40],"which":[41],"prompts":[42],"us":[43],"conduct":[45],"a":[46,67],"deeper":[47],"exploration":[48],"for":[49,82],"fusion.":[51],"Therefore,":[52],"in":[53,218],"this":[54,88,192],"paper,":[55],"according":[56],"mouth":[60],"landmark":[61],"movements":[62],"during":[63],"speech,":[64],"we":[65,90,132,151],"propose":[66],"novel":[68],"time-domain":[69],"single-channel":[70],"method:":[74],"fusion":[76,124],"temporal":[78,92,162],"convolution":[79,93],"attention":[80,94,100,136],"network":[81,95,180],"model":[85,103,222],"(AVTCA).":[86],"In":[87,126],"method,":[89],"design":[91],"(TCANet)":[96],"based":[97],"on":[98,139,197],"mechanism":[101],"contextual":[105],"relationships":[106],"between":[107],"sequences,":[111,148],"TCANet":[114,154],"as":[115,175],"basic":[117],"unit":[118],"construct":[120],"sequence":[121],"learning":[122],"network.":[125],"whole":[128],"deep":[129],"framework,":[131],"first":[133],"cross":[135],"focus":[138],"cross-correlation":[141],"then":[150],"fuse":[156],"sequences":[160,171],"dependencies":[163],"cross-correlations.":[165],"Afterwards,":[166],"features":[170],"will":[172],"be":[173],"used":[174],"input":[176],"predict":[182],"mask":[183],"source":[187],"each":[189],"Finally,":[191],"paper":[193],"conducts":[194],"comparative":[195],"experiments":[196],"Vox2,":[198],"GRID,":[199],"LRS2":[200],"TCD-TIMIT":[202],"datasets,":[203],"indicating":[204],"that":[205],"AVTCA":[206],"outperforms":[207],"other":[208],"state-of-the-art":[209],"(SOTA)":[210],"methods.":[212],"Furthermore,":[213],"it":[214],"exhibits":[215],"greater":[216],"efficiency":[217],"computational":[219],"performance":[220],"size.":[223]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
