{"id":"https://openalex.org/W4403780811","doi":"https://doi.org/10.1145/3664647.3681550","title":"MSFNet: Multi-Scale Fusion Network for Brain-Controlled Speaker Extraction","display_name":"MSFNet: Multi-Scale Fusion Network for Brain-Controlled Speaker Extraction","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403780811","doi":"https://doi.org/10.1145/3664647.3681550"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681550","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681550","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037493212","display_name":"Cunhang Fan","orcid":"https://orcid.org/0000-0001-6318-8803"},"institutions":[{"id":"https://openalex.org/I143868143","display_name":"Anhui University","ror":"https://ror.org/05th6yx34","country_code":"CN","type":"education","lineage":["https://openalex.org/I143868143"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Cunhang Fan","raw_affiliation_strings":["School of Computer Science and Technology, Anhui University, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Anhui University, Hefei, China","institution_ids":["https://openalex.org/I143868143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107953244","display_name":"Jingjing Zhang","orcid":"https://orcid.org/0009-0002-6662-0897"},"institutions":[{"id":"https://openalex.org/I143868143","display_name":"Anhui University","ror":"https://ror.org/05th6yx34","country_code":"CN","type":"education","lineage":["https://openalex.org/I143868143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingjing Zhang","raw_affiliation_strings":["School of Computer Science and Technology, Anhui University, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Anhui University, Hefei, China","institution_ids":["https://openalex.org/I143868143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023999422","display_name":"Hongyu Zhang","orcid":"https://orcid.org/0009-0006-0032-7566"},"institutions":[{"id":"https://openalex.org/I143868143","display_name":"Anhui University","ror":"https://ror.org/05th6yx34","country_code":"CN","type":"education","lineage":["https://openalex.org/I143868143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongyu Zhang","raw_affiliation_strings":["School of Computer Science and Technology, Anhui University, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Anhui University, Hefei, China","institution_ids":["https://openalex.org/I143868143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110683843","display_name":"Wang Xiang","orcid":null},"institutions":[{"id":"https://openalex.org/I143868143","display_name":"Anhui University","ror":"https://ror.org/05th6yx34","country_code":"CN","type":"education","lineage":["https://openalex.org/I143868143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wang Xiang","raw_affiliation_strings":["School of Computer Science and Technology, Anhui University, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Anhui University, Hefei, China","institution_ids":["https://openalex.org/I143868143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100744502","display_name":"Jianhua Tao","orcid":"https://orcid.org/0000-0002-0477-587X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianhua Tao","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116159413","display_name":"Xinhui Li","orcid":"https://orcid.org/0009-0006-5522-0037"},"institutions":[{"id":"https://openalex.org/I143868143","display_name":"Anhui University","ror":"https://ror.org/05th6yx34","country_code":"CN","type":"education","lineage":["https://openalex.org/I143868143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinhui Li","raw_affiliation_strings":["School of Computer Science and Technology, Anhui University, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Anhui University, Hefei, China","institution_ids":["https://openalex.org/I143868143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078525423","display_name":"Jiangyan Yi","orcid":"https://orcid.org/0000-0003-2422-4618"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiangyan Yi","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031774490","display_name":"Dianbo Sui","orcid":"https://orcid.org/0000-0002-5200-2265"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dianbo Sui","raw_affiliation_strings":["Harbin Institute of Technology, Weihai, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Weihai, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026466982","display_name":"Zhao Lv","orcid":"https://orcid.org/0000-0003-4530-4422"},"institutions":[{"id":"https://openalex.org/I143868143","display_name":"Anhui University","ror":"https://ror.org/05th6yx34","country_code":"CN","type":"education","lineage":["https://openalex.org/I143868143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhao Lv","raw_affiliation_strings":["School of Computer Science and Technology, Anhui University, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Anhui University, Hefei, China","institution_ids":["https://openalex.org/I143868143"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5037493212"],"corresponding_institution_ids":["https://openalex.org/I143868143"],"apc_list":null,"apc_paid":null,"fwci":3.4752,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.93585952,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1652","last_page":"1661"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7111333608627319},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.5729531049728394},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4938269555568695},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.488061785697937},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.4850579798221588},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.4167827367782593},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.38222363591194153},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3615102767944336},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.07738140225410461},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.05089870095252991}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7111333608627319},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.5729531049728394},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4938269555568695},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.488061785697937},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.4850579798221588},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.4167827367782593},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.38222363591194153},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3615102767944336},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.07738140225410461},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.05089870095252991},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681550","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681550","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W2067295501","https://openalex.org/W2128495200","https://openalex.org/W2150769028","https://openalex.org/W2587150483","https://openalex.org/W2742722178","https://openalex.org/W2756894032","https://openalex.org/W2760103357","https://openalex.org/W2893969570","https://openalex.org/W2895807593","https://openalex.org/W2937727294","https://openalex.org/W2952218014","https://openalex.org/W2962788625","https://openalex.org/W2964058413","https://openalex.org/W2973062255","https://openalex.org/W3008003372","https://openalex.org/W3015199127","https://openalex.org/W3016106826","https://openalex.org/W3026915299","https://openalex.org/W3057611960","https://openalex.org/W3105928222","https://openalex.org/W3161550781","https://openalex.org/W3163652268","https://openalex.org/W3183182307","https://openalex.org/W4224918929","https://openalex.org/W4226208605","https://openalex.org/W4226338831","https://openalex.org/W4295308317","https://openalex.org/W4309259952","https://openalex.org/W4372260086","https://openalex.org/W4385756463","https://openalex.org/W4385822730","https://openalex.org/W4387968167","https://openalex.org/W4387968393","https://openalex.org/W4392902910","https://openalex.org/W4392903251","https://openalex.org/W4392909475"],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W66821593","https://openalex.org/W4297807400","https://openalex.org/W4313854686","https://openalex.org/W2249138175","https://openalex.org/W1521299571","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W1516392727","https://openalex.org/W2140022733"],"abstract_inverted_index":{"Speaker":[0],"extraction":[1],"aims":[2],"to":[3,43,88,95,121,154],"selectively":[4],"extract":[5,89,123],"the":[6,10,14,24,32,37,47,51,82,86,90,100,103,114,124,129,136,147,161,164,170,181,187,202],"target":[7,52,91],"speaker":[8,53,78],"from":[9,36,85],"multi-talker":[11],"environment":[12],"under":[13],"guidance":[15],"of":[16,99,127,163],"auxiliary":[17],"reference.":[18],"Recent":[19],"studies":[20],"have":[21],"shown":[22],"that":[23,113,196],"attended":[25],"speaker's":[26],"information":[27,49],"can":[28],"be":[29],"decoded":[30],"by":[31],"auditory":[33,155],"attention":[34,156],"decoding":[35,157],"listener's":[38],"brain":[39],"activity.":[40],"However,":[41],"how":[42],"more":[44],"effectively":[45,122],"utilize":[46],"common":[48],"about":[50],"contained":[54],"in":[55,192,205],"both":[56,180],"electroencephalography":[57],"(EEG)":[58],"and":[59,158,186],"speech":[60,101,105],"is":[61,106],"still":[62],"an":[63],"unresolved":[64],"problem.":[65],"In":[66,93,119],"this":[67,193],"paper,":[68],"we":[69,167],"propose":[70],"a":[71,173],"multi-scale":[72,115,141],"fusion":[73],"network":[74],"(MSFNet)":[75],"for":[76],"brain-controlled":[77],"extraction,":[79],"which":[80],"utilizes":[81],"EEG":[83,137,148],"recorded":[84],"listener":[87],"speech.":[92],"order":[94],"make":[96],"full":[97],"use":[98],"information,":[102],"mixed":[104],"encoded":[107],"with":[108,146],"multiple":[109],"time":[110],"scales":[111],"so":[112],"embeddings":[116,142],"are":[117,133,143],"acquired.":[118],"addition,":[120],"non-Euclidean":[125],"data":[126],"EEG,":[128],"graph":[130],"convolutional":[131],"networks":[132],"used":[134],"as":[135],"encoder.":[138],"Finally,":[139],"these":[140],"separately":[144],"fused":[145],"features.":[149],"To":[150],"facilitate":[151],"research":[152],"related":[153],"further":[159],"validate":[160],"effectiveness":[162],"proposed":[165,189],"method,":[166],"also":[168],"construct":[169],"AVED":[171,190],"dataset,":[172],"new":[174],"EEG-Audio":[175],"dataset.":[176],"Experimental":[177],"results":[178],"on":[179],"public":[182],"Cocktail":[183],"Party":[184],"dataset":[185,191],"newly":[188],"paper":[194],"show":[195],"our":[197],"MSFNet":[198],"model":[199],"significantly":[200],"outperforms":[201],"state-of-the-art":[203],"method":[204],"certain":[206],"objective":[207],"evaluation":[208],"metrics.":[209]},"counts_by_year":[{"year":2025,"cited_by_count":10}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
