{"id":"https://openalex.org/W3041391240","doi":"https://doi.org/10.1145/3404716.3404726","title":"U Recurrent Neural Network for Polyphonic Sound Event Detection and Localization","display_name":"U Recurrent Neural Network for Polyphonic Sound Event Detection and Localization","publication_year":2020,"publication_date":"2020-05-28","ids":{"openalex":"https://openalex.org/W3041391240","doi":"https://doi.org/10.1145/3404716.3404726","mag":"3041391240"},"language":"en","primary_location":{"id":"doi:10.1145/3404716.3404726","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3404716.3404726","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 5th International Conference on Multimedia Systems and Signal Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060936410","display_name":"Lihong Pi","orcid":"https://orcid.org/0000-0003-1816-6220"},"institutions":[{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lihong Pi","raw_affiliation_strings":["Tsinghua University, The Institute of Microelectronics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, The Institute of Microelectronics, Beijing, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I4210119392"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102147241","display_name":"Xue Zheng","orcid":"https://orcid.org/0000-0001-7629-332X"},"institutions":[{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xue Zheng","raw_affiliation_strings":["Tsinghua University, The Institute of Microelectronics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, The Institute of Microelectronics, Beijing, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I4210119392"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074033048","display_name":"Chun Zhang","orcid":"https://orcid.org/0000-0001-9791-4500"},"institutions":[{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chun Zhang","raw_affiliation_strings":["Tsinghua University, The Institute of Microelectronics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, The Institute of Microelectronics, Beijing, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I4210119392"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022592072","display_name":"Ping Chen","orcid":"https://orcid.org/0000-0003-1676-6425"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ping Chen","raw_affiliation_strings":["Beijing Yiemed Medical Technology Co., Ltd, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Yiemed Medical Technology Co., Ltd, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100407628","display_name":"Zhe Wang","orcid":"https://orcid.org/0000-0002-6825-1063"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhe Wang","raw_affiliation_strings":["Beijing Sanping Technology Co., Ltd, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Sanping Technology Co., Ltd, Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100460336","display_name":"Xiangyu Li","orcid":"https://orcid.org/0009-0003-7782-1667"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Li","raw_affiliation_strings":["Research Institute of Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Research Institute of Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5060936410"],"corresponding_institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.1515,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.42060566,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"86","last_page":"91"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.9851999878883362,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7716429233551025},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.689681887626648},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5651028156280518},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5552760362625122},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5082551836967468},{"id":"https://openalex.org/keywords/polyphony","display_name":"Polyphony","score":0.4801206588745117},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.45850497484207153},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.45639297366142273},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.45596593618392944},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43460845947265625},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3973250985145569},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.1668885052204132}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7716429233551025},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.689681887626648},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5651028156280518},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5552760362625122},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5082551836967468},{"id":"https://openalex.org/C128979739","wikidata":"https://www.wikidata.org/wiki/Q179465","display_name":"Polyphony","level":2,"score":0.4801206588745117},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.45850497484207153},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.45639297366142273},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.45596593618392944},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43460845947265625},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3973250985145569},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.1668885052204132},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3404716.3404726","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3404716.3404726","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 5th International Conference on Multimedia Systems and Signal Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1846473900","https://openalex.org/W1901129140","https://openalex.org/W1994616650","https://openalex.org/W2046317813","https://openalex.org/W2097117768","https://openalex.org/W2341412280","https://openalex.org/W2407005679","https://openalex.org/W2591013610","https://openalex.org/W2613718673","https://openalex.org/W2622742434","https://openalex.org/W2748560825","https://openalex.org/W2810934215","https://openalex.org/W2942551338","https://openalex.org/W2949117887","https://openalex.org/W2971670255","https://openalex.org/W6648914341","https://openalex.org/W6941349294"],"related_works":["https://openalex.org/W2411659965","https://openalex.org/W2387677326","https://openalex.org/W4200063482","https://openalex.org/W2357575019","https://openalex.org/W2370117122","https://openalex.org/W2360603947","https://openalex.org/W2371528275","https://openalex.org/W2375454309","https://openalex.org/W2374135200","https://openalex.org/W2390487542"],"abstract_inverted_index":{"The":[0,149],"polyphonic":[1],"sound":[2,17,36,104,174],"event":[3,37],"detection":[4,38],"and":[5,13,22,40,74,92,128,139,157,162,201],"localization":[6],"(SELD)":[7],"system":[8],"indicates":[9],"the":[10,24,28,35,41,44,68,75,79,98,103,126,132,141,146,155,184,189,196,205],"temporal":[11],"onset":[12],"offset":[14],"time":[15],"of":[16,27,43,46,78,97,102,159],"events":[18,175],"to":[19,62,72,120,144,195,204],"be":[20],"detected":[21],"tracks":[23],"spatial":[25],"location":[26],"acoustic":[29],"source.":[30],"It":[31],"involves":[32],"two":[33,63],"processes,":[34],"(SED)":[39],"estimation":[42],"direction":[45],"arrival":[47],"(DOA).":[48],"However,":[49],"previous":[50],"models":[51],"only":[52,89],"extract":[53],"features":[54,101,130],"by":[55,192],"simply":[56],"stacking":[57],"convolutional":[58,207],"layers,":[59],"thus":[60],"leading":[61],"problems,":[64,123],"one":[65],"is":[66,70,81,85,118,152],"that":[67,86,183],"network":[69,116,147,210],"difficult":[71],"deepen":[73],"expressive":[76],"capability":[77],"model":[80,111,133,198],"limited,":[82],"another":[83],"problem":[84],"they":[87],"utilize":[88],"highlevel":[90],"features,":[91],"lack":[93],"a":[94,109],"feature":[95],"description":[96],"low-level":[99,127],"texture":[100],"signal.":[105],"In":[106],"this":[107],"paper,":[108],"novel":[110],"called":[112],"U":[113],"recurrent":[114,208],"neural":[115,209],"(URNN)":[117],"proposed":[119,150,185],"alleviate":[121],"those":[122],"it":[124],"combines":[125],"high-level":[129],"in":[131],"without":[134],"significantly":[135,187],"increasing":[136],"computation":[137],"costs,":[138],"exploits":[140],"identity":[142],"layer":[143],"make":[145],"deeper.":[148],"method":[151],"evaluated":[153],"on":[154],"Detection":[156],"Classification":[158],"Acoustic":[160],"Scenes":[161],"Events":[163],"(DCASE)":[164],"2019":[165],"Task":[166],"3":[167],"dataset":[168],"[1],":[169],"which":[170],"covers":[171],"distinct":[172],"overlapping":[173],"collected":[176],"from":[177],"different":[178],"environments.":[179],"Experimental":[180],"results":[181],"show":[182],"URNN":[186],"reduce":[188],"SELD":[190],"error":[191],"16.2%":[193],"compared":[194,203],"baseline":[197],"SELDnet":[199],"[2]":[200],"2.5%":[202],"improved":[206],"(CRNN).":[211]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
