{"id":"https://openalex.org/W2945399614","doi":"https://doi.org/10.23919/fruct.2019.8711906","title":"End-to-end Convolutional Neural Networks for Sound Event Detection in Urban Environments","display_name":"End-to-end Convolutional Neural Networks for Sound Event Detection in Urban Environments","publication_year":2019,"publication_date":"2019-04-01","ids":{"openalex":"https://openalex.org/W2945399614","doi":"https://doi.org/10.23919/fruct.2019.8711906","mag":"2945399614"},"language":"en","primary_location":{"id":"doi:10.23919/fruct.2019.8711906","is_oa":false,"landing_page_url":"https://doi.org/10.23919/fruct.2019.8711906","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 24th Conference of Open Innovations Association (FRUCT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doaj.org/article/2b860a11a63642d2a44dc1fcc40f0213","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075959503","display_name":"Pablo Zinemanas","orcid":"https://orcid.org/0000-0002-9372-730X"},"institutions":[{"id":"https://openalex.org/I180910786","display_name":"Universidad de la Rep\u00fablica","ror":"https://ror.org/030bbe882","country_code":"UY","type":"education","lineage":["https://openalex.org/I180910786"]}],"countries":["UY"],"is_corresponding":true,"raw_author_name":"Pablo Zinemanas","raw_affiliation_strings":["Facultad de Ingenieria, Universidad de la Rep\u00fablica Montevideo, Uruguay"],"affiliations":[{"raw_affiliation_string":"Facultad de Ingenieria, Universidad de la Rep\u00fablica Montevideo, Uruguay","institution_ids":["https://openalex.org/I180910786"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011472959","display_name":"Pablo Cancela","orcid":null},"institutions":[{"id":"https://openalex.org/I180910786","display_name":"Universidad de la Rep\u00fablica","ror":"https://ror.org/030bbe882","country_code":"UY","type":"education","lineage":["https://openalex.org/I180910786"]}],"countries":["UY"],"is_corresponding":false,"raw_author_name":"Pablo Cancela","raw_affiliation_strings":["Facultad de Ingenieria, Universidad de la Rep\u00fablica Montevideo, Uruguay"],"affiliations":[{"raw_affiliation_string":"Facultad de Ingenieria, Universidad de la Rep\u00fablica Montevideo, Uruguay","institution_ids":["https://openalex.org/I180910786"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023777950","display_name":"Mart\u00edn Rocamora","orcid":"https://orcid.org/0000-0003-3183-9717"},"institutions":[{"id":"https://openalex.org/I180910786","display_name":"Universidad de la Rep\u00fablica","ror":"https://ror.org/030bbe882","country_code":"UY","type":"education","lineage":["https://openalex.org/I180910786"]}],"countries":["UY"],"is_corresponding":false,"raw_author_name":"Martin Rocamora","raw_affiliation_strings":["Facultad de Ingenieria, Universidad de la Rep\u00fablica Montevideo, Uruguay"],"affiliations":[{"raw_affiliation_string":"Facultad de Ingenieria, Universidad de la Rep\u00fablica Montevideo, Uruguay","institution_ids":["https://openalex.org/I180910786"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5075959503"],"corresponding_institution_ids":["https://openalex.org/I180910786"],"apc_list":null,"apc_paid":null,"fwci":3.0044,"has_fulltext":false,"cited_by_count":28,"citation_normalized_percentile":{"value":0.92058179,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"533","last_page":"539"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9830999970436096,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.8277592658996582},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8002607822418213},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.7949973344802856},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.7616199254989624},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.6965246200561523},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5434877872467041},{"id":"https://openalex.org/keywords/filter-bank","display_name":"Filter bank","score":0.5314423441886902},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5170984268188477},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.4708625376224518},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46062013506889343},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.41477128863334656},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4068099856376648},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.39127442240715027},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.17386165261268616}],"concepts":[{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.8277592658996582},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8002607822418213},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.7949973344802856},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.7616199254989624},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.6965246200561523},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5434877872467041},{"id":"https://openalex.org/C100515483","wikidata":"https://www.wikidata.org/wiki/Q3268235","display_name":"Filter bank","level":3,"score":0.5314423441886902},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5170984268188477},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.4708625376224518},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46062013506889343},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.41477128863334656},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4068099856376648},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.39127442240715027},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.17386165261268616},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.23919/fruct.2019.8711906","is_oa":false,"landing_page_url":"https://doi.org/10.23919/fruct.2019.8711906","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 24th Conference of Open Innovations Association (FRUCT)","raw_type":"proceedings-article"},{"id":"pmh:oai:doaj.org/article:2b860a11a63642d2a44dc1fcc40f0213","is_oa":true,"landing_page_url":"https://doaj.org/article/2b860a11a63642d2a44dc1fcc40f0213","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the XXth Conference of Open Innovations Association FRUCT, Vol 854, Iss 24, Pp 533-539 (2019)","raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:doaj.org/article:2b860a11a63642d2a44dc1fcc40f0213","is_oa":true,"landing_page_url":"https://doaj.org/article/2b860a11a63642d2a44dc1fcc40f0213","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the XXth Conference of Open Innovations Association FRUCT, Vol 854, Iss 24, Pp 533-539 (2019)","raw_type":"article"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","score":0.8500000238418579,"id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1496704041","https://openalex.org/W1538221620","https://openalex.org/W1686810756","https://openalex.org/W2038484192","https://openalex.org/W2059652044","https://openalex.org/W2097117768","https://openalex.org/W2163605009","https://openalex.org/W2196961052","https://openalex.org/W2408093180","https://openalex.org/W2408239454","https://openalex.org/W2559688696","https://openalex.org/W2566935005","https://openalex.org/W2591013610","https://openalex.org/W2604293017","https://openalex.org/W2622742434","https://openalex.org/W2756815061","https://openalex.org/W2759976799","https://openalex.org/W2763761345","https://openalex.org/W2766930522","https://openalex.org/W2767858146","https://openalex.org/W2771361008","https://openalex.org/W2773111588","https://openalex.org/W2794150026","https://openalex.org/W2805041786","https://openalex.org/W2899201823","https://openalex.org/W2962813390","https://openalex.org/W2963071736","https://openalex.org/W2963517948","https://openalex.org/W2963881567","https://openalex.org/W6632221327","https://openalex.org/W6637373629","https://openalex.org/W6684191040","https://openalex.org/W6713548365","https://openalex.org/W6730401039","https://openalex.org/W6736350746","https://openalex.org/W6739564277","https://openalex.org/W6744964450","https://openalex.org/W6745878906","https://openalex.org/W6747172621"],"related_works":["https://openalex.org/W2936488316","https://openalex.org/W3091785813","https://openalex.org/W4310880831","https://openalex.org/W2811390910","https://openalex.org/W2146076056","https://openalex.org/W4312376745","https://openalex.org/W2913302899","https://openalex.org/W2016839265","https://openalex.org/W2014986661","https://openalex.org/W2945399614"],"abstract_inverted_index":{"We":[0,86,138,162],"present":[1,87],"a":[2,26,42,48,88,96,115,132],"novel":[3,89],"approach":[4,118],"to":[5,63,67,75,91,119,187],"tackle":[6],"the":[7,31,37,52,68,72,84,93,121,124,135,141,150,165,168,174,188,192],"problem":[8],"of":[9,25,58,71,82,123,134,143,194],"sound":[10],"event":[11],"detection":[12],"(SED)":[13],"in":[14,79,105],"urban":[15,160],"environments":[16],"using":[17,95],"end-to-end":[18],"convolutional":[19],"neural":[20,97,136],"networks":[21],"(CNN).":[22],"It":[23],"consists":[24],"1D":[27],"CNN":[28,50],"for":[29,51,157],"extracting":[30],"energy":[32,122,128],"on":[33,41,159],"mel-frequency":[34],"bands":[35],"from":[36],"audio":[38],"signal":[39],"based":[40],"simple":[43],"filter":[44,169],"bank,":[45],"followed":[46],"by":[47,149],"2D":[49],"classification":[53,182],"task.":[54],"The":[55,178],"main":[56],"goal":[57],"this":[59,144,154],"two-stage":[60],"architecture":[61],"is":[62,155],"bring":[64],"more":[65],"interpretability":[66],"first":[69],"layers":[70],"network":[73,98,151],"and":[74,108,152],"permit":[76],"their":[77],"reutilization":[78],"other":[80],"problems":[81],"same":[83],"domain.":[85],"model":[90],"calculate":[92],"mel-spectrogam":[94],"that":[99,184],"outperforms":[100],"an":[101],"existing":[102],"work,":[103],"both":[104],"its":[106,109],"simplicity":[107],"matching":[110],"performance.":[111],"Also,":[112],"we":[113],"implement":[114],"recently":[116],"proposed":[117],"normalize":[120],"mel-spectrogram":[125],"(per":[126],"channel":[127],"normalization'":[129],"PCEN)":[130],"as":[131,171,173],"layer":[133],"network.":[137],"show":[139],"how":[140,164],"parameters":[142,195],"normalization":[145,176],"can":[146],"be":[147],"learned":[148],"why":[153],"useful":[156],"SED":[158],"environments.":[161],"study":[163],"training":[166],"modifies":[167],"bank":[170],"well":[172],"PCEN":[175],"parameters.":[177],"obtained":[179],"system":[180],"achieves":[181],"results":[183],"are":[185],"comparable":[186],"state-of-the-art,":[189],"while":[190],"decreasing":[191],"number":[193],"involved.":[196]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
