{"id":"https://openalex.org/W3002785901","doi":"https://doi.org/10.1109/icassp40776.2020.9053108","title":"Frame-Based Overlapping Speech Detection Using Convolutional Neural Networks","display_name":"Frame-Based Overlapping Speech Detection Using Convolutional Neural Networks","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3002785901","doi":"https://doi.org/10.1109/icassp40776.2020.9053108","mag":"3002785901"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053108","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053108","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2001.09937","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112853229","display_name":"Midia Yousefi","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Midia Yousefi","raw_affiliation_strings":["Center for Robust Speech Systems (CRSS), Erik Jonsson School of Engineering, University of Texas at Dallas, Richardson, Texas, U.S.A","University of Texas at Dallas,Center for Robust Speech Systems (CRSS), Erik Jonsson School of Engineering,Richardson,Texas,U.S.A"],"affiliations":[{"raw_affiliation_string":"Center for Robust Speech Systems (CRSS), Erik Jonsson School of Engineering, University of Texas at Dallas, Richardson, Texas, U.S.A","institution_ids":["https://openalex.org/I162577319"]},{"raw_affiliation_string":"University of Texas at Dallas,Center for Robust Speech Systems (CRSS), Erik Jonsson School of Engineering,Richardson,Texas,U.S.A","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057910370","display_name":"John H. L. Hansen","orcid":"https://orcid.org/0000-0003-1382-9929"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John H.L. Hansen","raw_affiliation_strings":["Center for Robust Speech Systems (CRSS), Erik Jonsson School of Engineering, University of Texas at Dallas, Richardson, Texas, U.S.A","University of Texas at Dallas,Center for Robust Speech Systems (CRSS), Erik Jonsson School of Engineering,Richardson,Texas,U.S.A"],"affiliations":[{"raw_affiliation_string":"Center for Robust Speech Systems (CRSS), Erik Jonsson School of Engineering, University of Texas at Dallas, Richardson, Texas, U.S.A","institution_ids":["https://openalex.org/I162577319"]},{"raw_affiliation_string":"University of Texas at Dallas,Center for Robust Speech Systems (CRSS), Erik Jonsson School of Engineering,Richardson,Texas,U.S.A","institution_ids":["https://openalex.org/I162577319"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5112853229"],"corresponding_institution_ids":["https://openalex.org/I162577319"],"apc_list":null,"apc_paid":null,"fwci":0.1523,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.39355852,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"401","issue":null,"first_page":"6744","last_page":"6748"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8198627829551697},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7153528928756714},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6214747428894043},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.5921645760536194},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5587234497070312},{"id":"https://openalex.org/keywords/tracing","display_name":"Tracing","score":0.4279812276363373},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42482230067253113},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.40934300422668457},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32329225540161133}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8198627829551697},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7153528928756714},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6214747428894043},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.5921645760536194},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5587234497070312},{"id":"https://openalex.org/C138673069","wikidata":"https://www.wikidata.org/wiki/Q322229","display_name":"Tracing","level":2,"score":0.4279812276363373},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42482230067253113},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.40934300422668457},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32329225540161133},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053108","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053108","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2001.09937","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2001.09937","pdf_url":"https://arxiv.org/pdf/2001.09937","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3002785901","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2001.09937","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2001.09937","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2001.09937","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"doi:10.17023/ajya-6131","is_oa":true,"landing_page_url":"https://doi.org/10.17023/ajya-6131","pdf_url":null,"source":{"id":"https://openalex.org/S7407051697","display_name":"IEEE RESOURCE CENTERS","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2001.09937","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2001.09937","pdf_url":"https://arxiv.org/pdf/2001.09937","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5099999904632568,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3002785901.pdf","grobid_xml":"https://content.openalex.org/works/W3002785901.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W79962888","https://openalex.org/W1902027874","https://openalex.org/W1989954041","https://openalex.org/W2015143272","https://openalex.org/W2051758472","https://openalex.org/W2098441927","https://openalex.org/W2125336414","https://openalex.org/W2169264834","https://openalex.org/W2193413348","https://openalex.org/W2398324643","https://openalex.org/W2402428045","https://openalex.org/W2591682631","https://openalex.org/W2601064856","https://openalex.org/W2745338770","https://openalex.org/W2889145072","https://openalex.org/W2889785444","https://openalex.org/W2952752702","https://openalex.org/W2966215472","https://openalex.org/W2972864514","https://openalex.org/W2978556807","https://openalex.org/W3007965881","https://openalex.org/W3008876072","https://openalex.org/W6678809451","https://openalex.org/W6687566353","https://openalex.org/W6712861328"],"related_works":["https://openalex.org/W3015528371","https://openalex.org/W3094892544","https://openalex.org/W1496995542","https://openalex.org/W1968220781","https://openalex.org/W1964143424","https://openalex.org/W2009810445","https://openalex.org/W2945616452","https://openalex.org/W2168249605","https://openalex.org/W1837709900","https://openalex.org/W3016232124","https://openalex.org/W2406310406","https://openalex.org/W1807865112","https://openalex.org/W2081433959","https://openalex.org/W3210740416","https://openalex.org/W2149220570","https://openalex.org/W2533459529","https://openalex.org/W2234788913","https://openalex.org/W2753802668","https://openalex.org/W2006780816","https://openalex.org/W3091147711"],"abstract_inverted_index":{"Naturalistic":[0],"speech":[1,5,17,38,68,76,91],"recordings":[2],"usually":[3],"contain":[4],"signals":[6],"from":[7],"multiple":[8],"speakers.":[9,28],"This":[10],"phenomenon":[11],"can":[12,73],"degrade":[13],"the":[14,21,34,52,95],"performance":[15,54],"of":[16,23,36,80,84,89],"technologies":[18],"due":[19],"to":[20],"complexity":[22],"tracing":[24],"and":[25,59,82],"recognizing":[26],"individual":[27],"In":[29],"this":[30],"study,":[31],"we":[32],"investigate":[33],"detection":[35,53],"overlapping":[37,75],"on":[39,86,94],"segments":[40],"as":[41,43],"short":[42],"25":[44],"ms":[45],"using":[46,55],"Convolutional":[47],"Neural":[48],"Networks.":[49],"We":[50],"evaluate":[51],"different":[56],"spectral":[57],"features,":[58],"show":[60],"that":[61],"pyknogram":[62],"features":[63],"outperforms":[64],"other":[65],"commonly":[66],"used":[67],"features.":[69],"The":[70],"proposed":[71],"system":[72],"predict":[74],"with":[77],"an":[78],"accuracy":[79],"84%":[81],"Fs-core":[83],"88%":[85],"a":[87],"dataset":[88],"mixed":[90],"generated":[92],"based":[93],"GRID":[96],"dataset.":[97]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
