{"id":"https://openalex.org/W7123507395","doi":"https://doi.org/10.1109/mmsp64401.2025.11324338","title":"Data-independent Beamforming for End-to-end Multichannel Multi-speaker ASR","display_name":"Data-independent Beamforming for End-to-end Multichannel Multi-speaker ASR","publication_year":2025,"publication_date":"2025-09-21","ids":{"openalex":"https://openalex.org/W7123507395","doi":"https://doi.org/10.1109/mmsp64401.2025.11324338"},"language":null,"primary_location":{"id":"doi:10.1109/mmsp64401.2025.11324338","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp64401.2025.11324338","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Workshop on Multimedia Signal Processing (MMSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Can Cui","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159484","display_name":"Ian's Friends Foundation","ror":"https://ror.org/05d56bw89","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210159484"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Can Cui","raw_affiliation_strings":["iFLYTEK Co., Ltd.,Research and Development Group,China"],"affiliations":[{"raw_affiliation_string":"iFLYTEK Co., Ltd.,Research and Development Group,China","institution_ids":["https://openalex.org/I4210159484"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Paul Magron","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210121838","display_name":"Laboratoire Lorrain de Recherche en Informatique et ses Applications","ror":"https://ror.org/02vnf0c38","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I277688954","https://openalex.org/I4210107720","https://openalex.org/I4210121838","https://openalex.org/I4210159245","https://openalex.org/I90183372"]},{"id":"https://openalex.org/I90183372","display_name":"Universit\u00e9 de Lorraine","ror":"https://ror.org/04vfs2w97","country_code":"FR","type":"education","lineage":["https://openalex.org/I90183372"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Paul Magron","raw_affiliation_strings":["Multispeech Universit&#x00E9; de Lorraine,CNRS, Inria, LORIA,Nancy,France,F-54000"],"affiliations":[{"raw_affiliation_string":"Multispeech Universit&#x00E9; de Lorraine,CNRS, Inria, LORIA,Nancy,France,F-54000","institution_ids":["https://openalex.org/I4210121838","https://openalex.org/I90183372","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mostafa Sadeghi","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210121838","display_name":"Laboratoire Lorrain de Recherche en Informatique et ses Applications","ror":"https://ror.org/02vnf0c38","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I277688954","https://openalex.org/I4210107720","https://openalex.org/I4210121838","https://openalex.org/I4210159245","https://openalex.org/I90183372"]},{"id":"https://openalex.org/I90183372","display_name":"Universit\u00e9 de Lorraine","ror":"https://ror.org/04vfs2w97","country_code":"FR","type":"education","lineage":["https://openalex.org/I90183372"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Mostafa Sadeghi","raw_affiliation_strings":["Multispeech Universit&#x00E9; de Lorraine,CNRS, Inria, LORIA,Nancy,France,F-54000"],"affiliations":[{"raw_affiliation_string":"Multispeech Universit&#x00E9; de Lorraine,CNRS, Inria, LORIA,Nancy,France,F-54000","institution_ids":["https://openalex.org/I4210121838","https://openalex.org/I90183372","https://openalex.org/I1294671590"]}]},{"author_position":"last","author":{"id":null,"display_name":"Emmanuel Vincent","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210121838","display_name":"Laboratoire Lorrain de Recherche en Informatique et ses Applications","ror":"https://ror.org/02vnf0c38","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I277688954","https://openalex.org/I4210107720","https://openalex.org/I4210121838","https://openalex.org/I4210159245","https://openalex.org/I90183372"]},{"id":"https://openalex.org/I90183372","display_name":"Universit\u00e9 de Lorraine","ror":"https://ror.org/04vfs2w97","country_code":"FR","type":"education","lineage":["https://openalex.org/I90183372"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Emmanuel Vincent","raw_affiliation_strings":["Multispeech Universit&#x00E9; de Lorraine,CNRS, Inria, LORIA,Nancy,France,F-54000"],"affiliations":[{"raw_affiliation_string":"Multispeech Universit&#x00E9; de Lorraine,CNRS, Inria, LORIA,Nancy,France,F-54000","institution_ids":["https://openalex.org/I4210121838","https://openalex.org/I90183372","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210159484"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.69705847,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"240","last_page":"245"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9480000138282776,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9480000138282776,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.03620000183582306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.0017999999690800905,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.6898000240325928},{"id":"https://openalex.org/keywords/beamforming","display_name":"Beamforming","score":0.6826000213623047},{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.6326000094413757},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.46959999203681946},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.4106999933719635},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.3889000117778778},{"id":"https://openalex.org/keywords/interference","display_name":"Interference (communication)","score":0.38359999656677246}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7281000018119812},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.6898000240325928},{"id":"https://openalex.org/C54197355","wikidata":"https://www.wikidata.org/wiki/Q5782992","display_name":"Beamforming","level":2,"score":0.6826000213623047},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6481999754905701},{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.6326000094413757},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.46959999203681946},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4106999933719635},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.3889000117778778},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.38359999656677246},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35580000281333923},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.33230000734329224},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.32989999651908875},{"id":"https://openalex.org/C2778955098","wikidata":"https://www.wikidata.org/wiki/Q594676","display_name":"Capon","level":3,"score":0.31709998846054077},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.28949999809265137},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.2840000092983246},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C29705727","wikidata":"https://www.wikidata.org/wiki/Q294562","display_name":"Polar","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C142433447","wikidata":"https://www.wikidata.org/wiki/Q7806653","display_name":"Time\u2013frequency analysis","level":3,"score":0.265500009059906},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26249998807907104}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mmsp64401.2025.11324338","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp64401.2025.11324338","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Workshop on Multimedia Signal Processing (MMSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2125336414","https://openalex.org/W2918296821","https://openalex.org/W2963250244","https://openalex.org/W3013139777","https://openalex.org/W3094831814","https://openalex.org/W3097643313","https://openalex.org/W3169722760","https://openalex.org/W3207834622","https://openalex.org/W3207841327","https://openalex.org/W3208743843","https://openalex.org/W4221165117","https://openalex.org/W4223507627","https://openalex.org/W4225310539","https://openalex.org/W4319862214","https://openalex.org/W4319862656","https://openalex.org/W4372260044","https://openalex.org/W4375868938","https://openalex.org/W4385245566","https://openalex.org/W4385823063","https://openalex.org/W4388821012","https://openalex.org/W4389315128","https://openalex.org/W4391021761","https://openalex.org/W4392903921","https://openalex.org/W4403640427","https://openalex.org/W7108701786"],"related_works":[],"abstract_inverted_index":{"Automatic":[0],"speech":[1],"recognition":[2,84],"(ASR)":[3],"in":[4],"multichannel,":[5,41],"multi-speaker":[6,42],"scenarios":[7],"remains":[8],"challenging":[9],"due":[10],"to":[11,64,87,123,132,136],"ambient":[12],"noise,":[13],"reverberation":[14],"and":[15,49,125],"overlapping":[16],"speakers.":[17],"In":[18],"this":[19],"paper,":[20],"we":[21],"propose":[22],"a":[23,55,88,137],"beamforming":[24,81],"approach":[25],"that":[26,53,142],"processes":[27],"specific":[28],"angular":[29],"sectors":[30],"based":[31],"on":[32,108],"their":[33],"spherical":[34],"polar":[35],"coordinates":[36],"before":[37],"applying":[38],"an":[39],"end-to-end":[40],"ASR":[43,61,103,139],"system.":[44,104],"This":[45],"method":[46,116],"is":[47],"data-independent":[48],"training-free.":[50],"We":[51,105],"demonstrate":[52],"using":[54,65],"group":[56],"of":[57,69,77,92],"beamformed":[58],"signals":[59,78,94],"improves":[60,126],"performance":[62],"compared":[63,135],"the":[66,75,97,102,109,114],"same":[67],"number":[68,76],"raw":[70],"microphone":[71],"signals.":[72],"Moreover,":[73],"increasing":[74],"used":[79],"for":[80,101],"further":[82],"enhances":[83],"accuracy,":[85],"leading":[86],"more":[89],"efficient":[90],"use":[91],"multichannel":[93,138],"while":[95],"reducing":[96],"overall":[98],"input":[99],"load":[100],"conduct":[106],"experiments":[107],"AMI":[110],"meeting":[111],"corpus,":[112],"where":[113],"proposed":[115],"reduces":[117],"word":[118],"error":[119],"rate":[120],"by":[121,130],"up":[122,131],"11%":[124],"speaker":[127],"counting":[128],"accuracy":[129],"27%":[133],"relative":[134],"baseline":[140],"system":[141],"does":[143],"not":[144],"exploit":[145],"beamforming.":[146]},"counts_by_year":[],"updated_date":"2025-11-06T04:12:42.849631","created_date":"2025-10-10T00:00:00"}
