{"id":"https://openalex.org/W2745616976","doi":"https://doi.org/10.21437/interspeech.2017-746","title":"Frame-Wise Dynamic Threshold Based Polyphonic Acoustic Event Detection","display_name":"Frame-Wise Dynamic Threshold Based Polyphonic Acoustic Event Detection","publication_year":2017,"publication_date":"2017-08-16","ids":{"openalex":"https://openalex.org/W2745616976","doi":"https://doi.org/10.21437/interspeech.2017-746","mag":"2745616976"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2017-746","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-746","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091584895","display_name":"Xianjun Xia","orcid":"https://orcid.org/0000-0001-5277-6634"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xianjun Xia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017213156","display_name":"Roberto Togneri","orcid":"https://orcid.org/0000-0002-3778-4633"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roberto Togneri","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002219416","display_name":"Ferdous Sohel","orcid":"https://orcid.org/0000-0003-1557-4907"},"institutions":[{"id":"https://openalex.org/I176790772","display_name":"Murdoch University","ror":"https://ror.org/00r4sry34","country_code":"AU","type":"education","lineage":["https://openalex.org/I176790772"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Ferdous Sohel","raw_affiliation_strings":["Murdoch university"],"affiliations":[{"raw_affiliation_string":"Murdoch university","institution_ids":["https://openalex.org/I176790772"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088212422","display_name":"Defeng Huang","orcid":"https://orcid.org/0000-0002-1431-8859"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David Huang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5091584895"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.0506,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.88273749,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"474","last_page":"478"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/polyphony","display_name":"Polyphony","score":0.7919062376022339},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7905453443527222},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.7316645383834839},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.6712774038314819},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5849450826644897},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.582523763179779},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.48320305347442627},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4368906617164612},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42986056208610535},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.2037038505077362},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.06631597876548767}],"concepts":[{"id":"https://openalex.org/C128979739","wikidata":"https://www.wikidata.org/wiki/Q179465","display_name":"Polyphony","level":2,"score":0.7919062376022339},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7905453443527222},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.7316645383834839},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.6712774038314819},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5849450826644897},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.582523763179779},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.48320305347442627},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4368906617164612},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42986056208610535},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.2037038505077362},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.06631597876548767},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/interspeech.2017-746","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-746","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:publications/9f7477e3-8ded-46a2-b0e9-610c7543adb1","is_oa":false,"landing_page_url":"https://research-repository.uwa.edu.au/en/publications/9f7477e3-8ded-46a2-b0e9-610c7543adb1","pdf_url":null,"source":{"id":"https://openalex.org/S4306402523","display_name":"UWA Profiles and Research Repository (University of Western Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I177877127","host_organization_name":"The University of Western Australia","host_organization_lineage":["https://openalex.org/I177877127"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Xia , X , Togneri , R , Sohel , F &amp; Huang , D 2017 , Frame-wise dynamic threshold based polyphonic acoustic event detection . in F Lacerda , S Strombergsson , M Wlodarcza , M Heldner , J Gustafson &amp; D House (eds) , 2017 Proceedings of Interspeech . vol. 2017-August , Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH , International Speech Communication Association , Bonn , pp. 474-478 , Annual Conference of the International Speech Communication Association 2017 , Stockholm , Sweden , 20/08/17 . https://doi.org/10.21437/Interspeech.2017-746","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:researchrepository.murdoch.edu.au:39984","is_oa":false,"landing_page_url":"http://researchrepository.murdoch.edu.au/id/eprint/39984/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400274","display_name":"Murdoch Research Repository (Murdoch University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I176790772","host_organization_name":"Murdoch University","host_organization_lineage":["https://openalex.org/I176790772"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"  Xia, X., Togneri, R., Sohel, F. &lt;http://researchrepository.murdoch.edu.au/view/author/Sohel, Ferdous.html&gt; and Huang, D.   (2017)  Frame-Wise dynamic threshold based polyphonic acoustic event detection.   In: INTERSPEECH 2017, 20 - 24 August 2017, Stockholm, Sweden   ","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1650531274","https://openalex.org/W2016413609","https://openalex.org/W2074764925","https://openalex.org/W2095705004","https://openalex.org/W2132083787","https://openalex.org/W2134426937","https://openalex.org/W2135131618","https://openalex.org/W2160815625","https://openalex.org/W2408239454","https://openalex.org/W2622742434","https://openalex.org/W2752592287"],"related_works":["https://openalex.org/W2136763963","https://openalex.org/W2109705048","https://openalex.org/W2940588515","https://openalex.org/W1909151225","https://openalex.org/W1987783679","https://openalex.org/W2160030256","https://openalex.org/W1521297879","https://openalex.org/W4253235840","https://openalex.org/W3151937861","https://openalex.org/W2157423375"],"abstract_inverted_index":{"Acoustic":[0,106],"event":[1,8,35],"detection,":[2],"the":[3,6,11,14,32,43,48,61,71,103,114,118],"determination":[4],"of":[5,13,110,117],"acoustic":[7,34,45],"type":[9],"and":[10,56,89],"localisation":[12],"event,":[15],"has":[16,51],"been":[17],"widely":[18],"applied":[19],"in":[20,81,97],"many":[21],"real-world":[22],"applications.":[23],"Many":[24],"works":[25],"adopt":[26],"multi-label":[27],"classification":[28],"techniques":[29],"to":[30,41,52],"perform":[31],"polyphonic":[33,111],"detection":[36],"with":[37,67,75],"a":[38,76],"global":[39,49],"threshold":[40,50,73,79,93],"detect":[42],"active":[44],"events.":[46],"However,":[47],"be":[53],"set":[54],"manually":[55],"is":[57],"highly":[58],"dependent":[59],"on":[60,102],"database":[62,109],"being":[63],"tested.":[64],"To":[65],"deal":[66],"this,":[68],"we":[69],"replaced":[70],"fixed":[72],"method":[74],"frame-wise":[77],"dynamic":[78,92],"approach":[80],"this":[82,98],"paper.":[83],"Two":[84],"novel":[85],"approaches,":[86],"namely":[87],"contour":[88],"regressor":[90],"based":[91],"approaches":[94],"are":[95],"proposed":[96,119],"work.":[99],"Experimental":[100],"results":[101],"popular":[104],"TUT":[105],"Scenes":[107],"2016":[108],"events":[112],"demonstrated":[113],"superior":[115],"performance":[116],"approaches.":[120]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
