{"id":"https://openalex.org/W3205115292","doi":"https://doi.org/10.21437/interspeech.2022-11386","title":"TRUNet: Transformer-Recurrent-U Network for Multi-channel Reverberant Sound Source Separation","display_name":"TRUNet: Transformer-Recurrent-U Network for Multi-channel Reverberant Sound Source Separation","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W3205115292","doi":"https://doi.org/10.21437/interspeech.2022-11386","mag":"3205115292"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-11386","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11386","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058219648","display_name":"Ali Aroudi","orcid":"https://orcid.org/0000-0001-5770-0858"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ali Aroudi","raw_affiliation_strings":["Sony Europe B.V., Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Sony Europe B.V., Stuttgart, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010554448","display_name":"Stefan Uhlich","orcid":"https://orcid.org/0000-0003-3158-4945"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stefan Uhlich","raw_affiliation_strings":["Sony Europe B.V., Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Sony Europe B.V., Stuttgart, Germany","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077832121","display_name":"Marc Ferr\u00e0s Font","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marc Ferras Font","raw_affiliation_strings":["Sony Europe B.V., Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Sony Europe B.V., Stuttgart, Germany","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5058219648"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6137,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.63096367,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"911","last_page":"915"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.6025988459587097},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5427775979042053},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.5284780263900757},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5014562606811523},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.4710046350955963},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3097417950630188},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.2860693037509918},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1994282603263855},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.16521593928337097},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.12406215071678162}],"concepts":[{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.6025988459587097},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5427775979042053},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.5284780263900757},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5014562606811523},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.4710046350955963},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3097417950630188},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.2860693037509918},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1994282603263855},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.16521593928337097},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.12406215071678162},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-11386","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11386","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1506438021","https://openalex.org/W1522301498","https://openalex.org/W2003699215","https://openalex.org/W2117678320","https://openalex.org/W2242562301","https://openalex.org/W2311973309","https://openalex.org/W2568308529","https://openalex.org/W2734774145","https://openalex.org/W2911371112","https://openalex.org/W2937525188","https://openalex.org/W2952218014","https://openalex.org/W2963189033","https://openalex.org/W2963403868","https://openalex.org/W2963902628","https://openalex.org/W2964121744","https://openalex.org/W2972541922","https://openalex.org/W2998572311","https://openalex.org/W3015191643","https://openalex.org/W3015199127","https://openalex.org/W3015788098","https://openalex.org/W3123318516","https://openalex.org/W3160129476","https://openalex.org/W3160140981","https://openalex.org/W3161950572","https://openalex.org/W3163391314","https://openalex.org/W3163652268","https://openalex.org/W3171278394","https://openalex.org/W3196360884","https://openalex.org/W3216920799","https://openalex.org/W4246924397","https://openalex.org/W4285719527","https://openalex.org/W4287394172","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4252379193","https://openalex.org/W2936423518","https://openalex.org/W1995163783","https://openalex.org/W3016066622","https://openalex.org/W1993694758","https://openalex.org/W2902520638","https://openalex.org/W3160408143","https://openalex.org/W2566408874","https://openalex.org/W2036863524","https://openalex.org/W3015225820"],"abstract_inverted_index":{"In":[0],"recent":[1],"years,":[2],"many":[3],"deep":[4],"learning":[5],"techniques":[6],"for":[7,44],"single-channel":[8,110,178],"sound":[9,45,139,168],"source":[10,46,51,169,181],"separation":[11,140,170,182],"have":[12],"been":[13],"proposed":[14,128,174],"using":[15,41,116,126],"recurrent,":[16],"convolutional":[17],"and":[18,29,90,99,148,179],"transformer":[19],"networks.When":[20],"multiple":[21],"microphones":[22],"are":[23],"available,":[24],"spatial":[25,74,88],"diversity":[26,36],"between":[27],"speakers":[28],"background":[30],"noise":[31],"in":[32,53],"addition":[33,102],"to":[34,103],"spectro-temporal":[35],"can":[37],"be":[38],"exploited":[39],"by":[40],"multi-channel":[42,50,65,68,104,113,180],"filters":[43,66,111],"separation.Aiming":[47],"at":[48,85,96],"end-to-end":[49],"separation,":[52],"this":[54],"paper":[55],"we":[56,106],"propose":[57],"a":[58,73,91,122,127,146],"transformerrecurrent-U":[59],"network":[60,76,94,120,144],"(TRUNet),":[61],"which":[62,135],"directly":[63],"estimates":[64],"from":[67,112,153],"input":[69,114],"spectra.TRUNet":[70],"consists":[71],"of":[72,158],"processing":[75,93],"with":[77],"an":[78,159],"attention":[79],"mechanism":[80],"across":[81],"microphone":[82,161],"channels":[83],"aiming":[84,95],"capturing":[86,97],"the":[87,119,138,143,173],"diversity,":[89],"spectrotemporal":[92],"spectral":[98],"temporal":[100],"diversities.In":[101],"filters,":[105],"also":[107],"consider":[108],"estimating":[109],"spectra":[115],"TRUNet.We":[117],"train":[118],"on":[121,145,165],"large":[123],"reverberant":[124,150,167],"dataset":[125],"combined":[129],"compressed":[130],"mean-squared":[131],"error":[132],"loss":[133],"function,":[134],"further":[136],"improves":[137],"performance.We":[141],"evaluate":[142],"realistic":[147,166],"challenging":[149],"dataset,":[151],"generated":[152],"measured":[154],"room":[155],"impulse":[156],"responses":[157],"actual":[160],"array.The":[162],"experimental":[163],"results":[164],"show":[171],"that":[172],"TRUNet":[175],"outperforms":[176],"state-of-the-art":[177],"methods.":[183]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
