{"id":"https://openalex.org/W7148376623","doi":"https://doi.org/10.1109/asru65441.2025.11434606","title":"MBENet: Bone-conduction and Air-conduction Fusion Network for Target Speaker Extraction","display_name":"MBENet: Bone-conduction and Air-conduction Fusion Network for Target Speaker Extraction","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148376623","doi":"https://doi.org/10.1109/asru65441.2025.11434606"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100374052","display_name":"Chen Zhang","orcid":"https://orcid.org/0000-0001-5773-9090"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chen Zhang","raw_affiliation_strings":["Northwestern Polytechnical University,School of Marine Science and Technology,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,School of Marine Science and Technology,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032679393","display_name":"Linfeng Feng","orcid":"https://orcid.org/0009-0008-7908-779X"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linfeng Feng","raw_affiliation_strings":["Northwestern Polytechnical University,School of Marine Science and Technology,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,School of Marine Science and Technology,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhi Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi Liu","raw_affiliation_strings":["Shenzhen Huangli Technologies Company Ltd,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Huangli Technologies Company Ltd,Shenzhen,China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100430876","display_name":"Xi Zhang","orcid":"https://orcid.org/0000-0003-3415-5345"},"institutions":[{"id":"https://openalex.org/I4210136246","display_name":"China Telecom (China)","ror":"https://ror.org/03jgnzt20","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210136246"]},{"id":"https://openalex.org/I4387153335","display_name":"China Telecom","ror":"https://ror.org/05p67dv18","country_code":null,"type":"company","lineage":["https://openalex.org/I4387153335"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao-Lei Zhang","raw_affiliation_strings":["Institute of Artificial Intelligence (TeleAI),China Telecom,China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence (TeleAI),China Telecom,China","institution_ids":["https://openalex.org/I4210136246","https://openalex.org/I4387153335"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006533393","display_name":"Xiao Li","orcid":"https://orcid.org/0000-0002-3577-7915"},"institutions":[{"id":"https://openalex.org/I4210136246","display_name":"China Telecom (China)","ror":"https://ror.org/03jgnzt20","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210136246"]},{"id":"https://openalex.org/I4387153335","display_name":"China Telecom","ror":"https://ror.org/05p67dv18","country_code":null,"type":"company","lineage":["https://openalex.org/I4387153335"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuelong Li","raw_affiliation_strings":["Institute of Artificial Intelligence (TeleAI),China Telecom,China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence (TeleAI),China Telecom,China","institution_ids":["https://openalex.org/I4210136246","https://openalex.org/I4387153335"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100374052"],"corresponding_institution_ids":["https://openalex.org/I17145004"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.86689363,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6075999736785889,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6075999736785889,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.05620000138878822,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.04320000112056732,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.3659999966621399},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.35670000314712524},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34850001335144043},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.3314000070095062},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.3296999931335449},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.31310001015663147}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6313999891281128},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5853000283241272},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4223000109195709},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.3659999966621399},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.35670000314712524},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34850001335144043},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.3296999931335449},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.31310001015663147},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.30079999566078186},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.296099990606308},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.2696000039577484},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C2986627078","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker identification","level":3,"score":0.2531000077724457}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W2245745904","https://openalex.org/W2809941692","https://openalex.org/W2889442120","https://openalex.org/W2962866211","https://openalex.org/W3011424113","https://openalex.org/W3015623828","https://openalex.org/W3042800855","https://openalex.org/W3097653961","https://openalex.org/W3161480375","https://openalex.org/W3162534564","https://openalex.org/W4224931274","https://openalex.org/W4304979274","https://openalex.org/W4311897894","https://openalex.org/W4367597591","https://openalex.org/W4372260086","https://openalex.org/W4372271325","https://openalex.org/W4375928773","https://openalex.org/W4385822305","https://openalex.org/W4389164214","https://openalex.org/W4391021760","https://openalex.org/W4392903177","https://openalex.org/W4392903977","https://openalex.org/W4396833001","https://openalex.org/W4400105722","https://openalex.org/W4401416441","https://openalex.org/W4406462045"],"related_works":[],"abstract_inverted_index":{"Target":[0],"speaker":[1],"extraction":[2],"(TSE)":[3],"aims":[4],"to":[5,43],"isolate":[6],"a":[7],"target":[8,50],"speaker\u2019s":[9,51],"voice":[10],"from":[11],"mixed":[12],"speech":[13,39,63],"using":[14],"additional":[15],"cues.":[16],"Most":[17],"TSE":[18,68,93],"models":[19],"rely":[20],"on":[21],"air-conduction":[22],"(AC)":[23],"signals,":[24,76],"which":[25],"are":[26],"easily":[27],"affected":[28],"by":[29,103],"interfering":[30],"speakers":[31],"and":[32,46],"background":[33],"noise.":[34],"In":[35],"contrast,":[36],"bone-conduction":[37],"(BC)":[38],"is":[40],"naturally":[41],"resistant":[42],"ambient":[44],"noise":[45],"captures":[47],"only":[48,58],"the":[49,72,79,85,92,105,109],"voice.":[52],"Existing":[53],"BC-AC":[54,87],"fusion":[55,88],"methods":[56],"have":[57],"been":[59],"applied":[60],"in":[61,67,126],"conventional":[62],"enhancement,":[64],"lacking":[65],"exploration":[66],"applications.":[69],"To":[70,95],"leverage":[71],"advantages":[73],"of":[74,108],"BC":[75,110],"we":[77,99],"propose":[78],"Multi-modal":[80],"Bone-conduction":[81],"Enhancement":[82],"Network":[83],"(MBENet),":[84],"first":[86],"model":[89,122],"designed":[90],"for":[91],"task.":[94,115],"further":[96],"enhance":[97],"performance,":[98],"introduce":[100],"multi-task":[101],"learning":[102],"incorporating":[104],"bandwidth":[106],"extension":[107],"channel":[111],"as":[112],"an":[113],"auxiliary":[114],"Experimental":[116],"results":[117],"show":[118],"that":[119],"our":[120],"casual":[121],"outperforms":[123],"existing":[124],"approaches":[125],"challenging":[127],"environments.":[128]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2026-04-03T00:00:00"}
