{"id":"https://openalex.org/W3143147094","doi":"https://doi.org/10.21437/interspeech.2021-149","title":"Three-Class Overlapped Speech Detection Using a Convolutional Recurrent Neural Network","display_name":"Three-Class Overlapped Speech Detection Using a Convolutional Recurrent Neural Network","publication_year":2021,"publication_date":"2021-08-27","ids":{"openalex":"https://openalex.org/W3143147094","doi":"https://doi.org/10.21437/interspeech.2021-149","mag":"3143147094"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2021-149","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-149","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2104.02878","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091834525","display_name":"Jee-weon Jung","orcid":"https://orcid.org/0000-0003-0505-2988"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jee-weon Jung","raw_affiliation_strings":["NAVER CORPORATION"],"affiliations":[{"raw_affiliation_string":"NAVER CORPORATION","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070613375","display_name":"Hee-Soo Heo","orcid":"https://orcid.org/0000-0003-1567-123X"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hee-Soo Heo","raw_affiliation_strings":["Naver (South Korea), Seongnam-si, South Korea"],"affiliations":[{"raw_affiliation_string":"Naver (South Korea), Seongnam-si, South Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067505925","display_name":"Youngki Kwon","orcid":null},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Youngki Kwon","raw_affiliation_strings":["Naver (South Korea), Seongnam-si, South Korea"],"affiliations":[{"raw_affiliation_string":"Naver (South Korea), Seongnam-si, South Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038723822","display_name":"Joon Son Chung","orcid":"https://orcid.org/0000-0001-7741-7275"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Joon Son Chung","raw_affiliation_strings":["Naver (South Korea), Seongnam-si, South Korea"],"affiliations":[{"raw_affiliation_string":"Naver (South Korea), Seongnam-si, South Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086369397","display_name":"Bong\u2010Jin Lee","orcid":"https://orcid.org/0000-0001-7896-2961"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Bong-Jin Lee","raw_affiliation_strings":["Naver (South Korea), Seongnam-si, South Korea"],"affiliations":[{"raw_affiliation_string":"Naver (South Korea), Seongnam-si, South Korea","institution_ids":["https://openalex.org/I60922564"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5091834525"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04050239,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3086","last_page":"3090"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7891271114349365},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6505511403083801},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6421986818313599},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.576533317565918},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.571425199508667},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5550129413604736},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.5069001317024231},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.47022247314453125},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46735432744026184},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.4590193033218384},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.43918776512145996},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4344097375869751},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.4231453537940979},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.4120941162109375},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4025602340698242},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.35555481910705566}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7891271114349365},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6505511403083801},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6421986818313599},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.576533317565918},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.571425199508667},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5550129413604736},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.5069001317024231},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.47022247314453125},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46735432744026184},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.4590193033218384},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.43918776512145996},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4344097375869751},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.4231453537940979},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.4120941162109375},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4025602340698242},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.35555481910705566},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/interspeech.2021-149","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-149","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2104.02878","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.02878","pdf_url":"https://arxiv.org/pdf/2104.02878","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3143147094","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2104.02878v1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2104.02878","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2104.02878","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2104.02878","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.02878","pdf_url":"https://arxiv.org/pdf/2104.02878","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.5600000023841858,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3143147094.pdf","grobid_xml":"https://content.openalex.org/works/W3143147094.grobid-xml"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W43416829","https://openalex.org/W48568691","https://openalex.org/W184144068","https://openalex.org/W191366985","https://openalex.org/W1524333225","https://openalex.org/W2009876684","https://openalex.org/W2020150184","https://openalex.org/W2064675550","https://openalex.org/W2083751884","https://openalex.org/W2098441927","https://openalex.org/W2114970513","https://openalex.org/W2132914434","https://openalex.org/W2138730338","https://openalex.org/W2169264834","https://openalex.org/W2219249508","https://openalex.org/W2402428045","https://openalex.org/W2696967604","https://openalex.org/W2745338770","https://openalex.org/W2889381673","https://openalex.org/W2913340405","https://openalex.org/W2942551338","https://openalex.org/W2963420686","https://openalex.org/W2972449503","https://openalex.org/W3015528371","https://openalex.org/W3015780472","https://openalex.org/W3035193494","https://openalex.org/W3038871978","https://openalex.org/W3045742579","https://openalex.org/W3090254849","https://openalex.org/W3135990229","https://openalex.org/W3196595845","https://openalex.org/W3208240190"],"related_works":["https://openalex.org/W3198827946","https://openalex.org/W3099832538","https://openalex.org/W2801798988","https://openalex.org/W2931781875","https://openalex.org/W2963574857","https://openalex.org/W2566781703","https://openalex.org/W2504314600","https://openalex.org/W2889956729","https://openalex.org/W1561770256","https://openalex.org/W2736447239","https://openalex.org/W2972511707","https://openalex.org/W2950523597","https://openalex.org/W3141139761","https://openalex.org/W2888451110","https://openalex.org/W2486205537","https://openalex.org/W2538192464","https://openalex.org/W2980874206","https://openalex.org/W2118952425","https://openalex.org/W1532059006","https://openalex.org/W2734774145"],"abstract_inverted_index":{"In":[0,137],"this":[1],"work,":[2],"we":[3,139],"propose":[4],"an":[5],"overlapped":[6,30,44,103,149],"speech":[7,104,150],"detection":[8,105,151],"system":[9],"trained":[10],"as":[11,22],"a":[12,27,48,60,71,108,112,117,128,142],"three-class":[13],"classifier.":[14],"Unlike":[15],"conventional":[16],"systems":[17],"that":[18],"perform":[19],"binary":[20],"classification":[21],"to":[23,82,89,97,145],"whether":[24],"or":[25],"not":[26],"frame":[28],"contains":[29],"speech,":[31,42],"the":[32,51,56,65,122,147,161,165],"proposed":[33,102,148],"approach":[34,144],"classifies":[35],"into":[36],"three":[37],"classes:":[38],"non-speech,":[39],"single":[40],"speaker":[41,154],"and":[43,93,116],"speech.":[45],"By":[46],"training":[47],"network":[49,78],"with":[50,111,134],"more":[52],"detailed":[53],"label":[54],"definition,":[55],"model":[57,90,98,106,152],"can":[58],"learn":[59],"better":[61],"notion":[62],"on":[63,121],"deciding":[64],"number":[66],"of":[67,114,119,164],"speakers":[68],"included":[69],"in":[70,131,160],"given":[72],"frame.":[73],"A":[74],"convolutional":[75,86],"recurrent":[76,94],"neural":[77],"architecture":[79],"is":[80],"explored":[81],"benefit":[83],"from":[84],"both":[85],"layer's":[87,95],"capability":[88],"local":[91],"patterns":[92],"ability":[96],"sequential":[99],"information.":[100],"The":[101],"establishes":[107],"state-of-the-art":[109],"performance":[110],"precision":[113],"0.6648":[115],"recall":[118,132],"0.3222":[120],"DIHARD":[123,166],"II":[124],"evaluation":[125],"set,":[126],"showing":[127],"20%":[129],"increase":[130],"along":[133],"higher":[135],"precision.":[136],"addition,":[138],"also":[140],"introduce":[141],"simple":[143],"utilize":[146],"for":[153],"diarization":[155],"which":[156],"ranked":[157],"third":[158],"place":[159],"Track":[162],"1":[163],"III":[167],"challenge.":[168]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
