{"id":"https://openalex.org/W2939641110","doi":"https://doi.org/10.1109/icassp.2019.8682376","title":"A Region Based Attention Method for Weakly Supervised Sound Event Detection and Classification","display_name":"A Region Based Attention Method for Weakly Supervised Sound Event Detection and Classification","publication_year":2019,"publication_date":"2019-04-17","ids":{"openalex":"https://openalex.org/W2939641110","doi":"https://doi.org/10.1109/icassp.2019.8682376","mag":"2939641110"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2019.8682376","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682376","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101437836","display_name":"Jie Yan","orcid":"https://orcid.org/0000-0001-9512-322X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jie Yan","raw_affiliation_strings":["National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100381758","display_name":"Yan Song","orcid":"https://orcid.org/0000-0002-5668-9068"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Song","raw_affiliation_strings":["National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038369085","display_name":"Wu Guo","orcid":"https://orcid.org/0000-0002-3779-7944"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wu Guo","raw_affiliation_strings":["National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057227915","display_name":"Li-Rong Dai","orcid":"https://orcid.org/0000-0002-0859-2827"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li-Rong Dai","raw_affiliation_strings":["National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000620878","display_name":"Ian McLoughlin","orcid":"https://orcid.org/0000-0001-7111-2008"},"institutions":[{"id":"https://openalex.org/I167056439","display_name":"Medway School of Pharmacy","ror":"https://ror.org/00fa9v295","country_code":"GB","type":"education","lineage":["https://openalex.org/I167056439"]},{"id":"https://openalex.org/I20581793","display_name":"University of Kent","ror":"https://ror.org/00xkeyj56","country_code":"GB","type":"education","lineage":["https://openalex.org/I20581793"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ian McLoughlin","raw_affiliation_strings":["School of Computing, University of Kent, Medway, UK"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Kent, Medway, UK","institution_ids":["https://openalex.org/I167056439","https://openalex.org/I20581793"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100334619","display_name":"Liang Chen","orcid":"https://orcid.org/0000-0002-6598-1036"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang Chen","raw_affiliation_strings":["Anhui Science and Technology Research Institute"],"affiliations":[{"raw_affiliation_string":"Anhui Science and Technology Research Institute","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101437836"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":2.4879,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.8987971,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"755","last_page":"759"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7342008352279663},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.6648880243301392},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6396347284317017},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6336064338684082},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5726802945137024},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.568058967590332},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5163899064064026},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.46282821893692017},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.43928804993629456},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.4333059787750244},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4233388304710388},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3407025933265686},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08449974656105042}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7342008352279663},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.6648880243301392},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6396347284317017},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6336064338684082},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5726802945137024},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.568058967590332},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5163899064064026},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.46282821893692017},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43928804993629456},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.4333059787750244},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4233388304710388},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3407025933265686},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08449974656105042},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2019.8682376","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682376","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.41999998688697815,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1665214252","https://openalex.org/W1686810756","https://openalex.org/W1844944916","https://openalex.org/W2043194666","https://openalex.org/W2135131618","https://openalex.org/W2147917435","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2526050071","https://openalex.org/W2565639579","https://openalex.org/W2567070169","https://openalex.org/W2593116425","https://openalex.org/W2604490051","https://openalex.org/W2752782242","https://openalex.org/W2775505379","https://openalex.org/W2884011836","https://openalex.org/W2890685186","https://openalex.org/W2963420686","https://openalex.org/W2963723765","https://openalex.org/W2963970792","https://openalex.org/W2964121744","https://openalex.org/W4289329167","https://openalex.org/W6631190155","https://openalex.org/W6637242042","https://openalex.org/W6637373629","https://openalex.org/W6661441714","https://openalex.org/W6686164453","https://openalex.org/W6731370813","https://openalex.org/W6743731764","https://openalex.org/W6747331233","https://openalex.org/W6753516609"],"related_works":["https://openalex.org/W4225394202","https://openalex.org/W4298287631","https://openalex.org/W2953061907","https://openalex.org/W3032952384","https://openalex.org/W3034302643","https://openalex.org/W1847088711","https://openalex.org/W3036642985","https://openalex.org/W2964335273","https://openalex.org/W1889624880","https://openalex.org/W2229372569"],"abstract_inverted_index":{"Recently,":[0],"an":[1,51,130],"attention":[2,69,151],"based":[3,68,81,150],"convolutional":[4,124],"recurrent":[5,132],"neural":[6,133],"network":[7,134],"(CRNN)":[8],"with":[9],"learnable":[10],"gated":[11],"linear":[12],"units":[13],"(GLUs)":[14],"has":[15],"achieved":[16],"state-of-the-art":[17,184],"performance":[18],"for":[19,54,140,176],"audio":[20],"tagging":[21],"(AT)":[22],"and":[23,32,37,45,57,142,159,167,172,178],"sound":[24,116],"event":[25],"detection":[26],"(SED)":[27],"tasks":[28,161],"in":[29,162],"the":[30,74,78,112,147],"Detection":[31],"Classification":[33],"of":[34,43,77,114],"Acoustic":[35],"Scenes":[36],"Events":[38],"(DCASE)":[39],"challenges.":[40],"The":[41],"introduction":[42],"GLU":[44,80,93],"temporal":[46],"attention-based":[47,131],"localization":[48],"mechanisms":[49],"plays":[50],"important":[52],"role":[53],"both":[55],"AT":[56,141,160,169],"SED":[58,143,158,174],"tasks.":[59,144],"In":[60],"this":[61],"paper,":[62],"we":[63,84,97,107,153],"propose":[64],"a":[65,86,99],"novel":[66],"region":[67,109,149],"method":[70],"to":[71,94,102,135],"further":[72],"boost":[73],"representation":[75],"power":[76],"existing":[79],"CRNN.":[82],"Specifically,":[83],"insert":[85],"feature":[87],"selection":[88],"(FS)":[89],"structure":[90],"after":[91],"each":[92],"create":[95],"what":[96],"term":[98],"GLU-F.":[100],"block,":[101],"exploit":[103],"channel":[104],"relationships.":[105],"Furthermore,":[106],"extract":[108],"features":[110],"(or":[111],"prototypes":[113],"certain":[115],"events)":[117],"from":[118],"multi-scale":[119],"sliding":[120],"windows":[121],"over":[122],"higher":[123],"layers,":[125],"which":[126],"are":[127],"fed":[128],"into":[129],"model":[136],"their":[137],"context":[138],"information":[139],"To":[145],"evaluate":[146],"proposed":[148],"method,":[152],"conduct":[154],"extensive":[155],"experiments":[156],"on":[157],"DCASE2017.":[163],"We":[164],"achieve":[165],"59.5%":[166],"60.1%":[168],"F1-score,":[170],"51.3%":[171],"55.1%":[173],"F1-score":[175],"development":[177],"evaluation":[179],"sets":[180],"respectively,":[181],"significantly":[182],"outperforming":[183],"results.":[185]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
