{"id":"https://openalex.org/W3160903688","doi":"https://doi.org/10.1109/icassp39728.2021.9413837","title":"Sandglasset: A Light Multi-Granularity Self-Attentive Network for Time-Domain Speech Separation","display_name":"Sandglasset: A Light Multi-Granularity Self-Attentive Network for Time-Domain Speech Separation","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3160903688","doi":"https://doi.org/10.1109/icassp39728.2021.9413837","mag":"3160903688"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9413837","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413837","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102313170","display_name":"Max W. Y. Lam","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Max W. Y. Lam","raw_affiliation_strings":["Tencent AI Lab,Shenzhen,China","Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100682902","display_name":"Jun Wang","orcid":"https://orcid.org/0000-0002-3267-4777"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Wang","raw_affiliation_strings":["Tencent AI Lab,Shenzhen,China","Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075183307","display_name":"Dan Su","orcid":"https://orcid.org/0000-0001-5746-9545"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dan Su","raw_affiliation_strings":["Tencent AI Lab,Shenzhen,China","Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Yu","raw_affiliation_strings":["Tencent AI Lab,Bellevue,WA,USA","Tencent AI Lab, Bellevue, WA, USA"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab,Bellevue,WA,USA","institution_ids":["https://openalex.org/I4210108985"]},{"raw_affiliation_string":"Tencent AI Lab, Bellevue, WA, USA","institution_ids":["https://openalex.org/I4210108985"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102313170"],"corresponding_institution_ids":["https://openalex.org/I2250653659"],"apc_list":null,"apc_paid":null,"fwci":5.8643,"has_fulltext":false,"cited_by_count":49,"citation_normalized_percentile":{"value":0.97069058,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5759","last_page":"5763"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.9330626726150513},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7452055215835571},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7036534547805786},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6067036986351013},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5510247945785522},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5167633295059204},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5165942907333374},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3768687844276428},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3671417832374573},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34139639139175415},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1379179060459137}],"concepts":[{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.9330626726150513},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7452055215835571},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7036534547805786},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6067036986351013},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5510247945785522},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5167633295059204},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5165942907333374},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3768687844276428},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3671417832374573},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34139639139175415},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1379179060459137},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9413837","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413837","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5699999928474426,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1901129140","https://openalex.org/W1991139021","https://openalex.org/W2035576074","https://openalex.org/W2095705004","https://openalex.org/W2143169494","https://openalex.org/W2221409856","https://openalex.org/W2460742184","https://openalex.org/W2734774145","https://openalex.org/W2792764867","https://openalex.org/W2804078698","https://openalex.org/W2896457183","https://openalex.org/W2952218014","https://openalex.org/W2962905190","https://openalex.org/W2962935966","https://openalex.org/W2963341956","https://openalex.org/W2963452667","https://openalex.org/W2964110616","https://openalex.org/W2964121744","https://openalex.org/W2964189376","https://openalex.org/W2972460025","https://openalex.org/W2973071728","https://openalex.org/W2995166068","https://openalex.org/W2996969697","https://openalex.org/W3015199127","https://openalex.org/W3015794161","https://openalex.org/W3032371044","https://openalex.org/W3035268204","https://openalex.org/W3045904949","https://openalex.org/W3096893582","https://openalex.org/W3099330747","https://openalex.org/W3185109982","https://openalex.org/W4297789187","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6639824700","https://openalex.org/W6674330103","https://openalex.org/W6739901393","https://openalex.org/W6744649695","https://openalex.org/W6749825310","https://openalex.org/W6751512325","https://openalex.org/W6752378368","https://openalex.org/W6755207826","https://openalex.org/W6768815455","https://openalex.org/W6771792932","https://openalex.org/W6774687970","https://openalex.org/W6774995033","https://openalex.org/W6779126078"],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W2885125400","https://openalex.org/W2931688134","https://openalex.org/W1989889224","https://openalex.org/W4382618745","https://openalex.org/W1973775000","https://openalex.org/W2748922771","https://openalex.org/W4293305277"],"abstract_inverted_index":{"One":[0],"of":[1,22,83,92],"the":[2,20,61,80,84,93,102,115,126,139,152,168],"leading":[3],"single-channel":[4],"speech":[5],"separation":[6],"(SS)":[7],"models":[8],"is":[9,35],"based":[10],"on":[11,142],"a":[12,15,50,54],"TasNet":[13],"with":[14,53,114,133],"dual-path":[16],"segmentation":[17],"technique,":[18],"where":[19,151],"size":[21,70],"each":[23,76],"segment":[24],"remains":[25],"unchanged":[26],"throughout":[27],"all":[28],"layers.":[29],"In":[30],"contrast,":[31],"our":[32,131],"key":[33],"finding":[34],"that":[36,109],"multi-granularity":[37],"features":[38,85,113],"are":[39,118],"essential":[40],"for":[41,120],"enhancing":[42],"contextual":[43],"modeling":[44],"and":[45,71,96,149,162],"computational":[46,72],"efficiency.":[47],"We":[48,106],"introduce":[49],"self-attentive":[51],"network":[52,94],"novel":[55],"sandglass-shape,":[56],"namely":[57],"Sandglasset,":[58,79],"which":[59],"advances":[60],"state-of-the-art":[62],"(SOTA)":[63],"SS":[64,145],"performance":[65],"at":[66],"significantly":[67],"smaller":[68],"model":[69],"cost.":[73],"Forward":[74],"along":[75],"block":[77],"inside":[78],"temporal":[81],"granularity":[82,117],"gradually":[86],"becomes":[87],"coarser":[88],"until":[89],"reaching":[90],"half":[91],"blocks,":[95],"then":[97],"successively":[98],"turns":[99],"finer":[100],"towards":[101],"raw":[103],"signal":[104],"level.":[105],"also":[107],"unfold":[108],"residual":[110],"connections":[111],"between":[112],"same":[116],"critical":[119],"preserving":[121],"information":[122],"after":[123],"passing":[124],"through":[125],"bottleneck":[127],"layer.":[128],"Experiments":[129],"show":[130],"Sandglasset":[132],"only":[134],"2.3M":[135],"parameters":[136],"has":[137],"achieved":[138],"best":[140],"results":[141],"two":[143],"benchmark":[144],"datasets":[146],"\u2013":[147],"WSJ0-2mix":[148],"WSJ0-3mix,":[150],"SI-SNRi":[153],"scores":[154],"have":[155],"been":[156],"improved":[157],"by":[158],"absolute":[159],"0.6":[160],"dB":[161],"2.4":[163],"dB,":[164],"respectively,":[165],"comparing":[166],"to":[167],"prior":[169],"SOTA":[170],"results.":[171]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
