{"id":"https://openalex.org/W4381327731","doi":"https://doi.org/10.1109/access.2023.3287860","title":"An Audio-Visual Separation Model Integrating Dual-Channel Attention Mechanism","display_name":"An Audio-Visual Separation Model Integrating Dual-Channel Attention Mechanism","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4381327731","doi":"https://doi.org/10.1109/access.2023.3287860"},"language":"en","primary_location":{"id":"doi:10.1109/access.2023.3287860","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3287860","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10156828.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10156828.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100747973","display_name":"Yutao Zhang","orcid":"https://orcid.org/0000-0002-2552-7736"},"institutions":[{"id":"https://openalex.org/I106079672","display_name":"Hebei University of Engineering","ror":"https://ror.org/036h65h05","country_code":"CN","type":"education","lineage":["https://openalex.org/I106079672"]},{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yutao Zhang","raw_affiliation_strings":["School of Information and Electrical Engineering, Hebei University of Engineering, Handan, Hebei, China","College of Intelligence and Computing, Tianjin University, Tianjin, China"],"raw_orcid":"https://orcid.org/0000-0002-2552-7736","affiliations":[{"raw_affiliation_string":"School of Information and Electrical Engineering, Hebei University of Engineering, Handan, Hebei, China","institution_ids":["https://openalex.org/I106079672"]},{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058253405","display_name":"Kaixing Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I106079672","display_name":"Hebei University of Engineering","ror":"https://ror.org/036h65h05","country_code":"CN","type":"education","lineage":["https://openalex.org/I106079672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaixing Wu","raw_affiliation_strings":["School of Information and Electrical Engineering, Hebei University of Engineering, Handan, Hebei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information and Electrical Engineering, Hebei University of Engineering, Handan, Hebei, China","institution_ids":["https://openalex.org/I106079672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042644591","display_name":"Mengfan Zhao","orcid":"https://orcid.org/0000-0002-2035-2148"},"institutions":[{"id":"https://openalex.org/I106079672","display_name":"Hebei University of Engineering","ror":"https://ror.org/036h65h05","country_code":"CN","type":"education","lineage":["https://openalex.org/I106079672"]},{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengfan Zhao","raw_affiliation_strings":["School of Information and Electrical Engineering, Hebei University of Engineering, Handan, Hebei, China","College of Intelligence and Computing, Tianjin University, Tianjin, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information and Electrical Engineering, Hebei University of Engineering, Handan, Hebei, China","institution_ids":["https://openalex.org/I106079672"]},{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100747973"],"corresponding_institution_ids":["https://openalex.org/I106079672","https://openalex.org/I162868743"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.3805,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.54266592,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"11","issue":null,"first_page":"63069","last_page":"63080"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8250192403793335},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7575185894966125},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.705796480178833},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.7000190019607544},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.571011483669281},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5553641319274902},{"id":"https://openalex.org/keywords/blind-signal-separation","display_name":"Blind signal separation","score":0.5171443819999695},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5078371167182922},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.48852455615997314},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.48525357246398926},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.4663841724395752},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4546412229537964},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.4493265748023987},{"id":"https://openalex.org/keywords/digital-audio","display_name":"Digital audio","score":0.43470245599746704},{"id":"https://openalex.org/keywords/signal-to-interference-ratio","display_name":"Signal-to-interference ratio","score":0.41596049070358276},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.41511231660842896},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.3979082703590393},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.23412299156188965},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.10383102297782898}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8250192403793335},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7575185894966125},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.705796480178833},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.7000190019607544},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.571011483669281},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5553641319274902},{"id":"https://openalex.org/C120317606","wikidata":"https://www.wikidata.org/wiki/Q17105967","display_name":"Blind signal separation","level":3,"score":0.5171443819999695},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5078371167182922},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.48852455615997314},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.48525357246398926},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4663841724395752},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4546412229537964},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.4493265748023987},{"id":"https://openalex.org/C87687168","wikidata":"https://www.wikidata.org/wiki/Q173114","display_name":"Digital audio","level":4,"score":0.43470245599746704},{"id":"https://openalex.org/C75613536","wikidata":"https://www.wikidata.org/wiki/Q4138870","display_name":"Signal-to-interference ratio","level":3,"score":0.41596049070358276},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.41511231660842896},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.3979082703590393},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.23412299156188965},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.10383102297782898},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C194257627","wikidata":"https://www.wikidata.org/wiki/Q211554","display_name":"Amplifier","level":3,"score":0.0},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2023.3287860","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3287860","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10156828.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:83508b448f4c406eaa3bb5b78217c775","is_oa":true,"landing_page_url":"https://doaj.org/article/83508b448f4c406eaa3bb5b78217c775","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 11, Pp 63069-63080 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2023.3287860","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3287860","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10156828.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","display_name":"No poverty","score":0.4699999988079071}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4381327731.pdf","grobid_xml":"https://content.openalex.org/works/W4381327731.grobid-xml"},"referenced_works_count":53,"referenced_works":["https://openalex.org/W1604012244","https://openalex.org/W1790748249","https://openalex.org/W1902027874","https://openalex.org/W1974932989","https://openalex.org/W1991371791","https://openalex.org/W2041498927","https://openalex.org/W2099904336","https://openalex.org/W2120933697","https://openalex.org/W2135029798","https://openalex.org/W2137823674","https://openalex.org/W2143169494","https://openalex.org/W2291877678","https://openalex.org/W2524708987","https://openalex.org/W2533370895","https://openalex.org/W2587994092","https://openalex.org/W2777280533","https://openalex.org/W2777466939","https://openalex.org/W2844030168","https://openalex.org/W2894938704","https://openalex.org/W2918431349","https://openalex.org/W2920377649","https://openalex.org/W2928133111","https://openalex.org/W2959214850","https://openalex.org/W2962865004","https://openalex.org/W2963218389","https://openalex.org/W2963452667","https://openalex.org/W2964171275","https://openalex.org/W2964207404","https://openalex.org/W2981816492","https://openalex.org/W2981851635","https://openalex.org/W2988200020","https://openalex.org/W3000351820","https://openalex.org/W3017343282","https://openalex.org/W3022710784","https://openalex.org/W3087056823","https://openalex.org/W3109606338","https://openalex.org/W3124794156","https://openalex.org/W3125695697","https://openalex.org/W3154807520","https://openalex.org/W3154852953","https://openalex.org/W3195161059","https://openalex.org/W3196208130","https://openalex.org/W3206086363","https://openalex.org/W4213061196","https://openalex.org/W4289665794","https://openalex.org/W4313476633","https://openalex.org/W4327662052","https://openalex.org/W4386590515","https://openalex.org/W6636121673","https://openalex.org/W6680012447","https://openalex.org/W6704477683","https://openalex.org/W6751512325","https://openalex.org/W6850847994"],"related_works":["https://openalex.org/W2970176078","https://openalex.org/W2289868279","https://openalex.org/W1975359510","https://openalex.org/W1496727373","https://openalex.org/W4254699594","https://openalex.org/W2020952589","https://openalex.org/W1526651343","https://openalex.org/W2069094726","https://openalex.org/W2146634268","https://openalex.org/W2088690926"],"abstract_inverted_index":{"Sound":[0],"source":[1,102],"separation":[2,5,89,103,236],"is":[3,172,228],"the":[4,42,53,60,92,105,138,146,153,162,168,175,214],"of":[6,28,44,62,108,186],"targeted":[7],"sounds":[8],"from":[9],"a":[10,67,86],"noisy":[11],"environment,":[12],"which":[13,144,171],"plays":[14],"an":[15],"important":[16],"role":[17],"in":[18,41,66,222,234],"signal":[19,187,191,196],"processing":[20],"and":[21,110,124,131,140,156,195,219,225],"has":[22],"been":[23],"studied":[24],"extensively.":[25],"However,":[26],"most":[27],"these":[29,82],"researches":[30],"only":[31],"extract":[32,150],"audio":[33,111,117,141,157],"information":[34,75],"for":[35,121,179,202],"processing,":[36],"ignoring":[37],"visual":[38,109,139,155],"information,":[39],"resulting":[40,65],"waste":[43],"feature":[45,142],"information.":[46],"In":[47,182],"addition,":[48,183],"some":[49],"researchers":[50],"have":[51,57],"fused":[52],"extracted":[54,154],"features":[55,158],"but":[56],"not":[58],"noticed":[59],"weights":[61],"different":[63],"features,":[64],"poor":[68],"model":[69,90,99,215],"effect.":[70],"This":[71],"paper":[72],"uses":[73],"multi-modal":[74,87],"to":[76,80,127,149,166],"separate":[77],"sound":[78,101],"sources":[79],"solve":[81],"problems.":[83],"We":[84],"constructed":[85],"Audio-Visual":[88],"integrating":[91],"Dual-channel":[93],"Attention":[94],"mechanism":[95,148],"named":[96],"AVDA.":[97],"The":[98,205],"realizes":[100],"through":[104],"dynamic":[106],"fusion":[107,163],"features.":[112,151],"Specifically,":[113],"firstly,":[114],"we":[115],"take":[116],"video":[118,129],"as":[119],"input":[120],"data":[122,210],"preprocessing":[123],"segment":[125],"it":[126],"obtain":[128,167],"frames":[130],"audio.":[132],"Then":[133],"they":[134],"are":[135,159,200],"introduced":[136,160,201],"into":[137,161],"extractors,":[143],"integrate":[145],"attention":[147],"Finally,":[152],"prediction":[164],"component":[165],"predicted":[169],"spectrogram,":[170],"compared":[173],"with":[174],"ground":[176],"truth":[177],"spectrogram":[178],"subjective":[180],"evaluation.":[181],"three":[184,223],"indexes":[185],"distortion":[188],"ratio":[189,193,198],"(SDR),":[190],"interference":[192],"(SIR)":[194],"artifact":[197],"(SAR)":[199],"quantitative":[203],"comparison.":[204],"experimental":[206],"results":[207],"on":[208],"MUSIC-21":[209],"set":[211],"show":[212],"that":[213],"achieves":[216],"10.96,":[217],"17.91":[218],"12.77":[220],"respectively":[221],"indexes,":[224],"its":[226],"performance":[227],"significantly":[229],"better":[230],"than":[231],"other":[232],"models":[233],"audio-visual":[235],"tasks.":[237]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
