{"id":"https://openalex.org/W4372346562","doi":"https://doi.org/10.1109/icassp49357.2023.10095606","title":"Semantically-Informed Deep Neural Networks For Sound Recognition","display_name":"Semantically-Informed Deep Neural Networks For Sound Recognition","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372346562","doi":"https://doi.org/10.1109/icassp49357.2023.10095606"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-04476407/document","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005840108","display_name":"Michele Esposito","orcid":"https://orcid.org/0000-0002-7659-6520"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Michele Esposito","raw_affiliation_strings":["Maastricht University,Department of Cognitive Neuroscience,Maastricht,The Netherlands","Department of Cognitive Neuroscience, Maastricht University, Maastricht, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Maastricht University,Department of Cognitive Neuroscience,Maastricht,The Netherlands","institution_ids":["https://openalex.org/I34352273"]},{"raw_affiliation_string":"Department of Cognitive Neuroscience, Maastricht University, Maastricht, The Netherlands","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083404139","display_name":"Giancarlo Valente","orcid":"https://orcid.org/0000-0002-4525-3509"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Giancarlo Valente","raw_affiliation_strings":["Maastricht University,Department of Cognitive Neuroscience,Maastricht,The Netherlands","Department of Cognitive Neuroscience, Maastricht University, Maastricht, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Maastricht University,Department of Cognitive Neuroscience,Maastricht,The Netherlands","institution_ids":["https://openalex.org/I34352273"]},{"raw_affiliation_string":"Department of Cognitive Neuroscience, Maastricht University, Maastricht, The Netherlands","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024556909","display_name":"Yenisel Plasencia-Cala\u00f1a","orcid":"https://orcid.org/0000-0001-5173-0394"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Yenisel Plasencia-Cala\u00f1a","raw_affiliation_strings":["Maastricht University,BISS Institute,Maastricht,The Netherlands","BISS Institute, Maastricht University, Maastricht, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Maastricht University,BISS Institute,Maastricht,The Netherlands","institution_ids":["https://openalex.org/I34352273"]},{"raw_affiliation_string":"BISS Institute, Maastricht University, Maastricht, The Netherlands","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044836472","display_name":"Michel Dumontier","orcid":"https://orcid.org/0000-0003-4727-9435"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Michel Dumontier","raw_affiliation_strings":["Maastricht University,Institute of Data Science,Maastricht,The Netherlands","Institute of Data Science, Maastricht University, Maastricht, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Maastricht University,Institute of Data Science,Maastricht,The Netherlands","institution_ids":["https://openalex.org/I34352273"]},{"raw_affiliation_string":"Institute of Data Science, Maastricht University, Maastricht, The Netherlands","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060967255","display_name":"Bruno L. Giordano","orcid":"https://orcid.org/0000-0001-7002-0486"},"institutions":[{"id":"https://openalex.org/I4210138220","display_name":"Institut de Neurosciences de la Timone","ror":"https://ror.org/043hw6336","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I21491767","https://openalex.org/I4210096427","https://openalex.org/I4210138220"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Bruno L. Giordano","raw_affiliation_strings":["Universit&#x00E9; Aix-Marseille,Institut des Neurosciences de La Timone,Marseille,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universit&#x00E9; Aix-Marseille,Institut des Neurosciences de La Timone,Marseille,France","institution_ids":["https://openalex.org/I4210138220"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000413698","display_name":"Elia Formisano","orcid":"https://orcid.org/0000-0001-5008-2460"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Elia Formisano","raw_affiliation_strings":["Maastricht University,Department of Cognitive Neuroscience,Maastricht,The Netherlands","BISS Institute, Maastricht University, Maastricht, The Netherlands","Department of Cognitive Neuroscience, Maastricht University, Maastricht, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Maastricht University,Department of Cognitive Neuroscience,Maastricht,The Netherlands","institution_ids":["https://openalex.org/I34352273"]},{"raw_affiliation_string":"BISS Institute, Maastricht University, Maastricht, The Netherlands","institution_ids":["https://openalex.org/I34352273"]},{"raw_affiliation_string":"Department of Cognitive Neuroscience, Maastricht University, Maastricht, The Netherlands","institution_ids":["https://openalex.org/I34352273"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5005840108"],"corresponding_institution_ids":["https://openalex.org/I34352273"],"apc_list":null,"apc_paid":null,"fwci":0.3829,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.53351074,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.7604956030845642},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.7602525949478149},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7570382356643677},{"id":"https://openalex.org/keywords/categorical-variable","display_name":"Categorical variable","score":0.5505585074424744},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5462141633033752},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5198053121566772},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4261340796947479},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4217830002307892},{"id":"https://openalex.org/keywords/sound","display_name":"Sound (geography)","score":0.4128972291946411},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4091613292694092},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.15664741396903992}],"concepts":[{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.7604956030845642},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.7602525949478149},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7570382356643677},{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.5505585074424744},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5462141633033752},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5198053121566772},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4261340796947479},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4217830002307892},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.4128972291946411},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4091613292694092},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.15664741396903992},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C114793014","wikidata":"https://www.wikidata.org/wiki/Q52109","display_name":"Geomorphology","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-04476407v1","is_oa":true,"landing_page_url":"https://hal.science/hal-04476407","pdf_url":"https://hal.science/hal-04476407/document","source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://2023.ieeeicassp.org/","raw_type":"Conference papers"},{"id":"pmh:oai:cris.maastrichtuniversity.nl:publications/52555651-855c-482d-9171-00a2f2bdc5dd","is_oa":true,"landing_page_url":"https://cris.maastrichtuniversity.nl/en/publications/52555651-855c-482d-9171-00a2f2bdc5dd","pdf_url":"https://cris.maastrichtuniversity.nl/ws/files/187333611/Esposito-2023-Semantically-Informed-Deep-Neural-Networks-For-Sound-Recognition.pdf","source":{"id":"https://openalex.org/S4306402616","display_name":"Research Publications (Maastricht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I34352273","host_organization_name":"Maastricht University","host_organization_lineage":["https://openalex.org/I34352273"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Esposito, M, Valente, G, Plasencia-Cala\u00f1a, Y, Dumontier, M, Giordano, B L & Formisano, E 2023, Semantically-Informed Deep Neural Networks For Sound Recognition. in ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing, Proceedings. IEEE, ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, vol. 2023-June, 48th IEEE International Conference on Acoustics, Speech and Signal Processing, Rhodes Island, Greece, 4/06/23. https://doi.org/10.1109/ICASSP49357.2023.10095606","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:cris.maastrichtuniversity.nl:openaire/52555651-855c-482d-9171-00a2f2bdc5dd","is_oa":true,"landing_page_url":"https://cris.maastrichtuniversity.nl/files/187333611/Esposito-2023-Semantically-Informed-Deep-Neural-Networks-For-Sound-Recognition.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306402616","display_name":"Research Publications (Maastricht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I34352273","host_organization_name":"Maastricht University","host_organization_lineage":["https://openalex.org/I34352273"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Esposito, M, Valente, G, Plasencia-Cala\u00f1a, Y, Dumontier, M, Giordano, B L & Formisano, E 2023, Semantically-Informed Deep Neural Networks For Sound Recognition. in ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing, Proceedings. IEEE, ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings, vol. 2023-June, 48th IEEE International Conference on Acoustics, Speech and Signal Processing, Rhodes Island, Greece, 4/06/23. https://doi.org/10.1109/ICASSP49357.2023.10095606","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-04476407v1","is_oa":true,"landing_page_url":"https://hal.science/hal-04476407","pdf_url":"https://hal.science/hal-04476407/document","source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://2023.ieeeicassp.org/","raw_type":"Conference papers"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6899999976158142,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4372346562.pdf"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W197865394","https://openalex.org/W1614298861","https://openalex.org/W1836465849","https://openalex.org/W1980907054","https://openalex.org/W2038484192","https://openalex.org/W2052666245","https://openalex.org/W2075665712","https://openalex.org/W2095705004","https://openalex.org/W2160654481","https://openalex.org/W2250539671","https://openalex.org/W2526050071","https://openalex.org/W2593116425","https://openalex.org/W2800311957","https://openalex.org/W2896457183","https://openalex.org/W3094550259","https://openalex.org/W3161217709","https://openalex.org/W4205689591","https://openalex.org/W4289288178","https://openalex.org/W4292397060","https://openalex.org/W4298000016","https://openalex.org/W4321473516","https://openalex.org/W4327569188","https://openalex.org/W4372266552","https://openalex.org/W6638667902","https://openalex.org/W6674330103","https://openalex.org/W6786451267","https://openalex.org/W6850311597"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W4312814274","https://openalex.org/W1590307681","https://openalex.org/W2536018345","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2358353312","https://openalex.org/W2023946029"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"(DNNs)":[3],"for":[4],"sound":[5,12,19,106,117],"recognition":[6,107],"learn":[7],"to":[8,29,70],"categorize":[9],"a":[10,14,17,21,66,87,121],"barking":[11],"as":[13,20],"\"dog\"":[15],"and":[16,73,109,144,148],"meowing":[18],"\"cat\"":[22],"but":[23],"do":[24],"not":[25],"exploit":[26,49],"information":[27,52],"inherent":[28],"the":[30,54,76,80,136],"semantic":[31,51,77,114],"relations":[32,115],"between":[33,79,116],"classes":[34],"(e.g.,":[35],"both":[36],"are":[37],"animal":[38],"vocalisations).":[39],"Cognitive":[40],"neuroscience":[41],"research,":[42],"however,":[43],"suggests":[44],"that":[45,68,96,110,133],"human":[46,125,139],"listeners":[47,140],"automatically":[48],"higher-level":[50],"on":[53],"sources":[55,81],"besides":[56],"acoustic":[57],"information.":[58],"Inspired":[59],"by":[60],"this":[61],"notion,":[62],"we":[63,131],"introduce":[64],"here":[65],"DNN":[67,147],"learns":[69,75],"recognize":[71],"sounds":[72],"simultaneously":[74],"relation":[78],"(semDNN).":[82],"Comparison":[83],"of":[84,124,128,138],"semDNN":[85,97,134],"with":[86,91],"homologous":[88],"network":[89],"trained":[90],"categorical":[92],"labels":[93],"(catDNN)":[94],"revealed":[95],"produces":[98],"semantically":[99],"more":[100],"accurate":[101],"labelling":[102],"than":[103,142],"catDNN":[104,143],"in":[105],"tasks":[108],"semDNN-embeddings":[111],"preserve":[112],"higherlevel":[113],"sources.":[118],"Importantly,":[119],"through":[120],"model-based":[122],"analysis":[123],"dissimilarity":[126],"ratings":[127],"natural":[129],"sounds,":[130],"show":[132],"approximates":[135],"behaviour":[137],"better":[141],"several":[145],"other":[146],"NLP":[149],"comparison":[150],"models.":[151]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-10T00:00:00"}
