{"id":"https://openalex.org/W3203177955","doi":"https://doi.org/10.1109/taslp.2022.3173054","title":"SALSA: Spatial Cue-Augmented Log-Spectrogram Features for Polyphonic Sound Event Localization and Detection","display_name":"SALSA: Spatial Cue-Augmented Log-Spectrogram Features for Polyphonic Sound Event Localization and Detection","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W3203177955","doi":"https://doi.org/10.1109/taslp.2022.3173054","mag":"3203177955"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2022.3173054","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3173054","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2110.00275","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039913415","display_name":"Thi Ngoc Tho Nguyen","orcid":"https://orcid.org/0000-0002-0210-6373"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Thi Ngoc Tho Nguyen","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-0210-6373","affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076817923","display_name":"Karn N. Watcharasupat","orcid":"https://orcid.org/0000-0002-3878-5048"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Karn N. Watcharasupat","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-3878-5048","affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065667852","display_name":"Ngoc Khanh Nguyen","orcid":"https://orcid.org/0000-0001-8240-6167"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ngoc Khanh Nguyen","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105462806","display_name":"Douglas L. Jones","orcid":"https://orcid.org/0000-0002-7817-7629"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Douglas L. Jones","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072584895","display_name":"Woon\u2010Seng Gan","orcid":"https://orcid.org/0000-0002-7143-1823"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Woon-Seng Gan","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-7143-1823","affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5039913415"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":7.886,"has_fulltext":false,"cited_by_count":56,"citation_normalized_percentile":{"value":0.98298866,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"30","issue":null,"first_page":"1749","last_page":"1762"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8780096173286438},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5718016028404236},{"id":"https://openalex.org/keywords/direction-of-arrival","display_name":"Direction of arrival","score":0.5486468076705933},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5059999823570251},{"id":"https://openalex.org/keywords/microphone-array","display_name":"Microphone array","score":0.4768570363521576},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4733562469482422},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46844226121902466},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.453867644071579},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.4221649467945099},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40022480487823486}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8780096173286438},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5718016028404236},{"id":"https://openalex.org/C172051844","wikidata":"https://www.wikidata.org/wiki/Q5280438","display_name":"Direction of arrival","level":3,"score":0.5486468076705933},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5059999823570251},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.4768570363521576},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4733562469482422},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46844226121902466},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.453867644071579},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4221649467945099},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40022480487823486},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C21822782","wikidata":"https://www.wikidata.org/wiki/Q131214","display_name":"Antenna (radio)","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/taslp.2022.3173054","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3173054","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2110.00275","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2110.00275","pdf_url":"https://arxiv.org/pdf/2110.00275","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:dr.ntu.edu.sg:10356/157118","is_oa":false,"landing_page_url":"https://hdl.handle.net/10356/157118","pdf_url":null,"source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Journal Article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2110.00275","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2110.00275","pdf_url":"https://arxiv.org/pdf/2110.00275","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322724","display_name":"Ministry of Education, India","ror":"https://ror.org/048xjjh50"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W1422086691","https://openalex.org/W1964998538","https://openalex.org/W1969299255","https://openalex.org/W1993678034","https://openalex.org/W2025344720","https://openalex.org/W2051428568","https://openalex.org/W2085156437","https://openalex.org/W2130121545","https://openalex.org/W2139129402","https://openalex.org/W2290075840","https://openalex.org/W2292996718","https://openalex.org/W2509065397","https://openalex.org/W2518102674","https://openalex.org/W2640418943","https://openalex.org/W2672714283","https://openalex.org/W2810934215","https://openalex.org/W2936774411","https://openalex.org/W2942551338","https://openalex.org/W2964681312","https://openalex.org/W2982382207","https://openalex.org/W2982429715","https://openalex.org/W2982680886","https://openalex.org/W2998139081","https://openalex.org/W2998508940","https://openalex.org/W3005741390","https://openalex.org/W3033731578","https://openalex.org/W3081461453","https://openalex.org/W3083274258","https://openalex.org/W3085832277","https://openalex.org/W3091667472","https://openalex.org/W3094550259","https://openalex.org/W3096287167","https://openalex.org/W3098357269","https://openalex.org/W3098454764","https://openalex.org/W3112745223","https://openalex.org/W3117314925","https://openalex.org/W3120252178","https://openalex.org/W3149712154","https://openalex.org/W3163193264","https://openalex.org/W3163206520","https://openalex.org/W3163881933","https://openalex.org/W3171659463","https://openalex.org/W3174280965","https://openalex.org/W3176079376","https://openalex.org/W3177143793","https://openalex.org/W4287120192","https://openalex.org/W4287766186","https://openalex.org/W4324116353","https://openalex.org/W6628353819","https://openalex.org/W6779923105","https://openalex.org/W6782995065","https://openalex.org/W6784117923","https://openalex.org/W6796679619","https://openalex.org/W6797669297","https://openalex.org/W6922472746"],"related_works":["https://openalex.org/W1879255185","https://openalex.org/W2120442551","https://openalex.org/W2769861442","https://openalex.org/W1980506188","https://openalex.org/W2900122540","https://openalex.org/W4240587264","https://openalex.org/W2011788874","https://openalex.org/W2041060376","https://openalex.org/W2963983801","https://openalex.org/W3119734852"],"abstract_inverted_index":{"Sound":[0,163],"event":[1,13,20],"localization":[2,193,220],"and":[3,15,76,152,192,219,226],"detection":[4,14,21],"(SELD)":[5],"consists":[6,92],"of":[7,93,103,181],"two":[8,56],"subtasks,":[9],"which":[10,81],"are":[11,140],"sound":[12,19,30,87],"direction-of-arrival":[16,32],"estimation.":[17],"While":[18],"mainly":[22],"relies":[23],"on":[24,114,159],"time-frequency":[25,70,111],"patterns":[26],"to":[27,41,52,126,201,232],"distinguish":[28],"different":[29,143],"classes,":[31],"estimation":[33],"uses":[34],"amplitude":[35,128],"and/or":[36,129],"phase":[37,130],"differences":[38,131],"between":[39,72,132],"microphones":[40],"estimate":[42],"source":[43,78],"directions.":[44],"As":[45,135],"a":[46,60,136],"result,":[47,137],"it":[48],"is":[49,82],"often":[50],"difficult":[51],"jointly":[53],"optimize":[54],"these":[55],"subtasks.":[57],"We":[58],"propose":[59],"novel":[61],"feature":[62,91],"called":[63],"<i>Spatial":[64],"cue-Augmented":[65],"Log-SpectrogrAm</i>":[66],"(SALSA)":[67],"with":[68,98,167,206,237],"exact":[69],"mapping":[71],"the":[73,77,99,104,115,119,133,160,179,185,189,202,210],"signal":[74],"power":[75],"directional":[79,168],"cues,":[80],"crucial":[83],"for":[84,142],"resolving":[85],"overlapping":[86],"sources.":[88],"The":[89],"SALSA":[90,138,172,182,214],"multichannel":[94,153,203,234],"log-spectrograms":[95],"stacked":[96],"along":[97],"normalized":[100,124],"principal":[101,120],"eigenvector":[102,121],"spatial":[105],"covariance":[106],"matrix":[107],"at":[108],"each":[109],"corresponding":[110],"bin.":[112],"Depending":[113],"microphone":[116,144,154],"array":[117,145,155],"format,":[118,212],"can":[122],"be":[123],"differently":[125],"extract":[127],"microphones.":[134],"features":[139,173,183,215],"applicable":[141],"formats":[146],"such":[147],"as":[148],"first-order":[149],"ambisonics":[150],"(FOA)":[151],"(MIC).":[156],"Experimental":[157],"results":[158],"TAU-NIGENS":[161],"Spatial":[162],"Events":[164],"2021":[165],"dataset":[166],"interferences":[169],"showed":[170],"that":[171],"outperformed":[174],"other":[175],"state-of-the-art":[176],"features.":[177],"Specifically,":[178],"use":[180],"in":[184],"FOA":[186],"format":[187],"increased":[188,216],"F1":[190,217],"score":[191,218],"recall":[194,221],"by":[195,222],"<inline-formula><tex-math":[196,223,227],"notation=\"LaTeX\">$6":[197],"\\,\\%$</tex-math></inline-formula>":[198,225],"each,":[199],"compared":[200,231],"log-mel":[204,235],"spectrograms":[205,236],"intensity":[207],"vectors.":[208],"For":[209],"MIC":[211],"using":[213,233],"notation=\"LaTeX\">$16":[224],"notation=\"LaTeX\">$7":[228],"\\,\\%$</tex-math></inline-formula>,":[229],"respectively,":[230],"generalized":[238],"cross-correlation":[239],"spectra.":[240]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
