{"id":"https://openalex.org/W4375869119","doi":"https://doi.org/10.1109/icassp49357.2023.10097107","title":"Predictive Skim: Contrastive Predictive Coding for Low-Latency Online Speech Separation","display_name":"Predictive Skim: Contrastive Predictive Coding for Low-Latency Online Speech Separation","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869119","doi":"https://doi.org/10.1109/icassp49357.2023.10097107"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10097107","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10097107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090224430","display_name":"Chenda Li","orcid":"https://orcid.org/0000-0003-0299-9914"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA","CN"],"is_corresponding":true,"raw_author_name":"Chenda Li","raw_affiliation_strings":["Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab,Department of Computer Science and Engineering,China","Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab, Shanghai Jiao Tong University, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab,Department of Computer Science and Engineering,China","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab, Shanghai Jiao Tong University, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101452320","display_name":"Yifei Wu","orcid":"https://orcid.org/0009-0002-8201-6438"},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]},{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CA","CN"],"is_corresponding":false,"raw_author_name":"Yifei Wu","raw_affiliation_strings":["Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab,Department of Computer Science and Engineering,China","Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab, Shanghai Jiao Tong University, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab,Department of Computer Science and Engineering,China","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab, Shanghai Jiao Tong University, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100341993","display_name":"Yanmin Qian","orcid":"https://orcid.org/0000-0002-0314-3790"},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]},{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CA","CN"],"is_corresponding":false,"raw_author_name":"Yanmin Qian","raw_affiliation_strings":["Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab,Department of Computer Science and Engineering,China","Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab, Shanghai Jiao Tong University, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab,Department of Computer Science and Engineering,China","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute X-LANCE Lab, Shanghai Jiao Tong University, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5090224430"],"corresponding_institution_ids":["https://openalex.org/I183067930","https://openalex.org/I4210164862"],"apc_list":null,"apc_paid":null,"fwci":0.9856,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.74532855,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7792688608169556},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.7406637668609619},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5260438919067383},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.46045994758605957},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.42983368039131165},{"id":"https://openalex.org/keywords/linear-predictive-coding","display_name":"Linear predictive coding","score":0.42613664269447327},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.41842374205589294},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3238341212272644},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.1113760769367218}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7792688608169556},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.7406637668609619},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5260438919067383},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.46045994758605957},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.42983368039131165},{"id":"https://openalex.org/C59883199","wikidata":"https://www.wikidata.org/wiki/Q1826438","display_name":"Linear predictive coding","level":3,"score":0.42613664269447327},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.41842374205589294},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3238341212272644},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.1113760769367218},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10097107","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10097107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.6600000262260437}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1991139021","https://openalex.org/W2221409856","https://openalex.org/W2302255633","https://openalex.org/W2460742184","https://openalex.org/W2734774145","https://openalex.org/W2842511635","https://openalex.org/W2952218014","https://openalex.org/W2962866211","https://openalex.org/W2962935966","https://openalex.org/W2964058413","https://openalex.org/W3015199127","https://openalex.org/W3016232124","https://openalex.org/W3020336359","https://openalex.org/W3036069164","https://openalex.org/W3096408984","https://openalex.org/W3097906045","https://openalex.org/W3099330747","https://openalex.org/W3110852964","https://openalex.org/W3151851237","https://openalex.org/W3160241658","https://openalex.org/W3162539493","https://openalex.org/W3163652268","https://openalex.org/W3165227983","https://openalex.org/W3193846000","https://openalex.org/W3197832224","https://openalex.org/W4224936432","https://openalex.org/W4224938485","https://openalex.org/W4226016254","https://openalex.org/W4297808394","https://openalex.org/W4311187069","https://openalex.org/W6698183232","https://openalex.org/W6718827390","https://openalex.org/W6779963951"],"related_works":["https://openalex.org/W2363301696","https://openalex.org/W2352223112","https://openalex.org/W1570840316","https://openalex.org/W2139283974","https://openalex.org/W2383072803","https://openalex.org/W4312036005","https://openalex.org/W2156505556","https://openalex.org/W2363056088","https://openalex.org/W2120730869","https://openalex.org/W2541680182"],"abstract_inverted_index":{"In":[0,41,62,132],"online":[1,77,90,128,156],"speech":[2,12,30,81,91,129],"separation,":[3],"there":[4],"is":[5,51,85,100],"a":[6,86,136,178],"trade-off":[7],"between":[8],"inherent":[9],"latency":[10,56,175],"and":[11,38,57,147,162],"separation":[13,31,82,130],"performance.":[14],"When":[15],"processing":[16],"the":[17,35,42,48,54,67,74,94,97,104,109,116,127,144,184],"current":[18],"input":[19],"audio,":[20],"looking":[21],"ahead":[22],"to":[23,73,102,142],"more":[24],"future":[25,49,105],"context":[26,50,138],"usually":[27],"brings":[28],"better":[29],"performance":[32],"but":[33],"increases":[34],"algorithm":[36,55],"latency,":[37,47],"vice":[39],"versa.":[40],"requirements":[43],"of":[44],"extremely":[45],"low":[46],"expensive":[52],"for":[53,89],"may":[58],"not":[59],"be":[60,183],"available.":[61],"this":[63],"work,":[64],"we":[65,134,148],"apply":[66],"contrastive":[68],"predictive":[69,117,157],"coding":[70],"(CPC)":[71],"method":[72,141],"previously":[75],"proposed":[76],"Skipping":[78],"Memory":[79],"(SkiM)":[80],"model,":[83],"which":[84,181],"low-latency":[87],"model":[88,99,119],"separation.":[92],"During":[93],"training":[95],"stage,":[96],"SkiM":[98,118,158],"required":[101],"predict":[103],"memory":[106],"states":[107],"given":[108],"history":[110],"memory.":[111],"By":[112],"using":[113],"CPC":[114,161],"training,":[115],"shows":[120],"stronger":[121],"causal":[122,188],"sequence":[123],"modeling":[124],"capacity":[125],"in":[126],"task.":[131],"addition,":[133],"explore":[135],"local":[137],"codec":[139],"(LCC)":[140],"reduce":[143],"computational":[145],"cost,":[146],"make":[149],"qualitative":[150],"analyses":[151],"on":[152,169,177],"it.":[153],"Our":[154],"best":[155],"equipped":[159],"with":[160,172],"LCC":[163],"gets":[164],"15.5":[165],"dB":[166],"SI-SNR":[167],"improvement":[168],"WSJ02-mix":[170],"benchmark":[171],"3-ms":[173],"actual":[174],"tested":[176],"single-core":[179],"CPU,":[180],"should":[182],"state-of-the-art":[185],"results":[186],"among":[187],"models.":[189]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
