{"id":"https://openalex.org/W2935889207","doi":"https://doi.org/10.1109/icassp.2019.8683243","title":"A Two-stage Single-channel Speaker-dependent Speech Separation Approach for Chime-5 Challenge","display_name":"A Two-stage Single-channel Speaker-dependent Speech Separation Approach for Chime-5 Challenge","publication_year":2019,"publication_date":"2019-04-17","ids":{"openalex":"https://openalex.org/W2935889207","doi":"https://doi.org/10.1109/icassp.2019.8683243","mag":"2935889207"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2019.8683243","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8683243","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101926390","display_name":"Lei Sun","orcid":"https://orcid.org/0000-0001-7680-6455"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lei Sun","raw_affiliation_strings":["University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066595711","display_name":"Jun Du","orcid":"https://orcid.org/0000-0002-2387-0389"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Du","raw_affiliation_strings":["University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101590046","display_name":"Tian Gao","orcid":"https://orcid.org/0000-0002-9523-5560"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian Gao","raw_affiliation_strings":["iFlytek Research, Hefei, Anhui, P. R. China"],"affiliations":[{"raw_affiliation_string":"iFlytek Research, Hefei, Anhui, P. R. China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067418255","display_name":"Yi Fang","orcid":"https://orcid.org/0000-0002-1538-249X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi Fang","raw_affiliation_strings":["iFlytek Research, Hefei, Anhui, P. R. China"],"affiliations":[{"raw_affiliation_string":"iFlytek Research, Hefei, Anhui, P. R. China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089891264","display_name":"Feng Ma","orcid":"https://orcid.org/0000-0002-6539-0965"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng Ma","raw_affiliation_strings":["iFlytek Research, Hefei, Anhui, P. R. China"],"affiliations":[{"raw_affiliation_string":"iFlytek Research, Hefei, Anhui, P. R. China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101924451","display_name":"Jia Pan","orcid":"https://orcid.org/0000-0002-7073-1744"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia Pan","raw_affiliation_strings":["iFlytek Research, Hefei, Anhui, P. R. China"],"affiliations":[{"raw_affiliation_string":"iFlytek Research, Hefei, Anhui, P. R. China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066868860","display_name":"Chin\u2010Hui Lee","orcid":"https://orcid.org/0000-0002-1892-2551"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chin-Hui Lee","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, Georgia, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, Georgia, USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101926390"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.4976,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.61415245,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"6650","last_page":"6654"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7848027944564819},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7669537663459778},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.7643700242042542},{"id":"https://openalex.org/keywords/oracle","display_name":"Oracle","score":0.581052839756012},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.543201208114624},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5177751779556274},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.5081973075866699},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.49372348189353943},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.48209789395332336},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.45749884843826294},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4216604232788086},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.41752058267593384},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3720721900463104},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32250910997390747},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.31272000074386597},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.2602512240409851},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07713353633880615}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7848027944564819},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7669537663459778},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.7643700242042542},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.581052839756012},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.543201208114624},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5177751779556274},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.5081973075866699},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.49372348189353943},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.48209789395332336},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.45749884843826294},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4216604232788086},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.41752058267593384},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3720721900463104},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32250910997390747},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.31272000074386597},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.2602512240409851},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07713353633880615},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2019.8683243","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8683243","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W160800111","https://openalex.org/W1524333225","https://openalex.org/W1897240248","https://openalex.org/W1983812858","https://openalex.org/W1991139021","https://openalex.org/W2046869671","https://openalex.org/W2072548008","https://openalex.org/W2078528584","https://openalex.org/W2087368178","https://openalex.org/W2091432990","https://openalex.org/W2101045344","https://openalex.org/W2119599673","https://openalex.org/W2148613904","https://openalex.org/W2168379380","https://openalex.org/W2172265180","https://openalex.org/W2221409856","https://openalex.org/W2460742184","https://openalex.org/W2513383847","https://openalex.org/W2514741789","https://openalex.org/W2558649592","https://openalex.org/W2605589342","https://openalex.org/W2610674366","https://openalex.org/W2749337587","https://openalex.org/W2765976216","https://openalex.org/W2884797218","https://openalex.org/W2944332341","https://openalex.org/W2962866211","https://openalex.org/W3131736947","https://openalex.org/W6631362777"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2149220986","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W1493012537","https://openalex.org/W1999004162","https://openalex.org/W2175373321","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2023466863"],"abstract_inverted_index":{"In":[0],"this":[1,181],"paper,":[2],"we":[3,38,63,167],"design":[4],"a":[5,65,71,86,92,153,163],"two-stage":[6],"single-channel":[7,157],"speaker-dependent":[8,67,88,158],"speech":[9,24,68,83,89,102,159],"separation":[10,69,90,160],"approach":[11,120],"for":[12,57,74,161],"the":[13,17,43,100,115,141,148,176,188,196],"CHiME-5":[14,44,197],"Challenge,":[15],"targeting":[16],"problem":[18],"of":[19,42,49,129,143,156,171,191],"far-field":[20],"and":[21,34,46,53,80],"multi-talker":[22],"conversational":[23],"recognition":[25],"in":[26,136,195],"dinner":[27],"party":[28],"scenarios":[29],"involving":[30],"background":[31],"noises,":[32],"reverberations":[33],"overlapping":[35],"speech.":[36],"First,":[37],"make":[39,168],"detailed":[40],"analysis":[41],"data":[45,56],"observe":[47],"problems":[48],"inaccurate":[50],"human":[51],"annotations":[52],"low-resource":[54],"useable":[55],"target":[58,73,82,95],"speakers.":[59],"Motivated":[60],"by":[61],"this,":[62],"conduct":[64],"first-stage":[66],"with":[70,91,114],"learning":[72,94],"aggressive":[75],"segregation":[76],"to":[77,98,109,134,151],"generate":[78],"more":[79],"purer":[81],"data.":[84],"Then":[85],"second-stage":[87],"new":[93],"is":[96,147],"performed":[97],"obtain":[99],"final":[101],"masks,":[103],"which":[104],"can":[105,121],"be":[106],"directly":[107],"fed":[108],"back-end":[110],"acoustic":[111],"model.":[112],"Compared":[113],"official":[116],"baseline,":[117],"our":[118,144,184],"proposed":[119],"yield":[122],"an":[123,169],"absolute":[124],"word":[125],"error":[126],"rate":[127],"reduction":[128],"5.3%,":[130],"namely":[131],"from":[132],"81.3%":[133],"76.0%":[135],"development":[137],"test":[138],"set.":[139],"To":[140],"best":[142],"knowledge,":[145],"it":[146],"first":[149,189],"time":[150],"discuss":[152],"feasible":[154],"method":[155],"such":[162],"challenging":[164],"task":[165],"although":[166],"assumption":[170],"oracle":[172],"speaker":[173],"diarization":[174],"following":[175],"challenge":[177],"rules.":[178],"By":[179],"integrating":[180],"crucial":[182],"technique,":[183],"submitted":[185],"systems":[186],"achieved":[187],"place":[190],"all":[192],"four":[193],"tasks":[194],"challenge.":[198]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
