{"id":"https://openalex.org/W4403126475","doi":"https://doi.org/10.1109/iwaenc61483.2024.10694313","title":"TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement","display_name":"TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement","publication_year":2024,"publication_date":"2024-09-09","ids":{"openalex":"https://openalex.org/W4403126475","doi":"https://doi.org/10.1109/iwaenc61483.2024.10694313"},"language":"en","primary_location":{"id":"doi:10.1109/iwaenc61483.2024.10694313","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwaenc61483.2024.10694313","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079146015","display_name":"Kohei Saijo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kohei Saijo","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086940921","display_name":"Gordon Wichern","orcid":"https://orcid.org/0000-0002-8597-6795"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gordon Wichern","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102812631","display_name":"Fran\u00e7ois G. Germain","orcid":"https://orcid.org/0000-0002-8973-5315"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fran\u00e7ois G. Germain","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060530570","display_name":"Zexu Pan","orcid":"https://orcid.org/0000-0002-8106-1176"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zexu Pan","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064097430","display_name":"Jonathan Le Roux","orcid":"https://orcid.org/0000-0002-0158-2837"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jonathan Le Roux","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.1144,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.97222958,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"205","last_page":"209"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9376000165939331,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5270595550537109},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5105377435684204},{"id":"https://openalex.org/keywords/overlap\u2013add-method","display_name":"Overlap\u2013add method","score":0.47884654998779297},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.4633958637714386},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.45845359563827515},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.39932340383529663},{"id":"https://openalex.org/keywords/electronic-engineering","display_name":"Electronic engineering","score":0.37292081117630005},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1780945062637329},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.16777944564819336},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12264445424079895},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.12194868922233582},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.10384294390678406},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.08369451761245728},{"id":"https://openalex.org/keywords/fourier-transform","display_name":"Fourier transform","score":0.07999637722969055}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5270595550537109},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5105377435684204},{"id":"https://openalex.org/C181002996","wikidata":"https://www.wikidata.org/wiki/Q1611641","display_name":"Overlap\u2013add method","level":5,"score":0.47884654998779297},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.4633958637714386},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.45845359563827515},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.39932340383529663},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.37292081117630005},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1780945062637329},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.16777944564819336},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12264445424079895},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.12194868922233582},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.10384294390678406},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.08369451761245728},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.07999637722969055},{"id":"https://openalex.org/C76563020","wikidata":"https://www.wikidata.org/wiki/Q4817582","display_name":"Fractional Fourier transform","level":4,"score":0.0},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C203024314","wikidata":"https://www.wikidata.org/wiki/Q1365258","display_name":"Fourier analysis","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iwaenc61483.2024.10694313","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwaenc61483.2024.10694313","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47999998927116394,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1552314771","https://openalex.org/W2127851351","https://openalex.org/W2141998673","https://openalex.org/W2221409856","https://openalex.org/W2460742184","https://openalex.org/W2952218014","https://openalex.org/W2964058413","https://openalex.org/W2972818416","https://openalex.org/W3015191643","https://openalex.org/W3015199127","https://openalex.org/W3096893582","https://openalex.org/W3097777922","https://openalex.org/W3097906045","https://openalex.org/W3151851237","https://openalex.org/W3163652268","https://openalex.org/W3172942063","https://openalex.org/W3185109982","https://openalex.org/W3198028464","https://openalex.org/W3213661887","https://openalex.org/W4224936432","https://openalex.org/W4225310539","https://openalex.org/W4250482878","https://openalex.org/W4296068774","https://openalex.org/W4385756463","https://openalex.org/W4385822313","https://openalex.org/W4388979610","https://openalex.org/W4391021752","https://openalex.org/W4392903251","https://openalex.org/W4392903841","https://openalex.org/W6757817989","https://openalex.org/W6763608318","https://openalex.org/W6767440493","https://openalex.org/W6772383348","https://openalex.org/W6773820404","https://openalex.org/W6777776875","https://openalex.org/W6784333009","https://openalex.org/W6847363464"],"related_works":["https://openalex.org/W2267589039","https://openalex.org/W2369791303","https://openalex.org/W4372260258","https://openalex.org/W2133280289","https://openalex.org/W2360069155","https://openalex.org/W2759540840","https://openalex.org/W2169963286","https://openalex.org/W4254230825","https://openalex.org/W2919798019","https://openalex.org/W2159795102"],"abstract_inverted_index":{"Time-frequency":[0],"(TF)":[1],"domain":[2],"dual-path":[3,53,117],"models":[4,14],"achieve":[5],"high-fidelity":[6],"speech":[7],"separation.":[8],"While":[9],"some":[10],"previous":[11],"state-of-the-art":[12],"(SoTA)":[13],"rely":[15],"on":[16,47,91,120],"RNNs,":[17],"this":[18,43],"reliance":[19],"means":[20],"they":[21],"lack":[22],"the":[23,32,49,88,106,127],"paralleliz-ability,":[24],"scalability,":[25],"and":[26,101,122],"versatility":[27],"of":[28,35,80],"Transformer":[29],"blocks.":[30],"Given":[31],"wide-ranging":[33],"success":[34],"pure":[36],"Transformer-based":[37,64],"architectures":[38],"in":[39,42,134],"other":[40],"fields,":[41],"work":[44,60],"we":[45],"focus":[46,90],"removing":[48],"RNN":[50],"from":[51],"TF-domain":[52,116],"models,":[54],"while":[55],"maintaining":[56],"SoTA":[57,133],"performance.":[58],"This":[59],"presents":[61],"TF-Locoformer,":[62],"a":[63,112],"model":[65,71,129],"with":[66,76,137],"LOcal-modeling":[67],"by":[68],"COnvolution.":[69],"The":[70],"uses":[72],"feed-forward":[73],"networks":[74],"(FFNs)":[75],"convolution":[77],"layers,":[78,82],"instead":[79],"linear":[81],"to":[83,104],"capture":[84],"local":[85],"information,":[86],"letting":[87],"self-attention":[89,103],"capturing":[92],"global":[93],"patterns.":[94],"We":[95,109],"place":[96],"two":[97],"such":[98],"FFNs":[99],"before":[100],"after":[102],"enhance":[105],"local-modeling":[107],"capability.":[108],"also":[110],"introduce":[111],"novel":[113],"normalization":[114],"for":[115],"models.":[118],"Experiments":[119],"separation":[121],"enhancement":[123],"datasets":[124],"show":[125],"that":[126],"proposed":[128],"meets":[130],"or":[131],"exceeds":[132],"multiple":[135],"benchmarks":[136],"an":[138],"RNN-free":[139],"architecture.":[140]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
