{"id":"https://openalex.org/W3213455868","doi":"https://doi.org/10.1109/icassp43922.2022.9746132","title":"SALSA-Lite: A Fast and Effective Feature for Polyphonic Sound Event Localization and Detection with Microphone Arrays","display_name":"SALSA-Lite: A Fast and Effective Feature for Polyphonic Sound Event Localization and Detection with Microphone Arrays","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3213455868","doi":"https://doi.org/10.1109/icassp43922.2022.9746132","mag":"3213455868"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746132","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746132","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2111.08192","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039913415","display_name":"Thi Ngoc Tho Nguyen","orcid":"https://orcid.org/0000-0002-0210-6373"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Thi Ngoc Tho Nguyen","raw_affiliation_strings":["Nanyang Technological University (NTU),School of Electrical and Electronic Engineering,Singapore","School of Electrical and Electronic Engineering, Nanyang Technological University (NTU), Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University (NTU),School of Electrical and Electronic Engineering,Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University (NTU), Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105462806","display_name":"Douglas L. Jones","orcid":"https://orcid.org/0000-0002-7817-7629"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Douglas L. Jones","raw_affiliation_strings":["University of Illinois at Urbana-Champaign,Dept. of Electrical and Computer Engineering,USA","Dept. of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign,Dept. of Electrical and Computer Engineering,USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076817923","display_name":"Karn N. Watcharasupat","orcid":"https://orcid.org/0000-0002-3878-5048"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Karn N. Watcharasupat","raw_affiliation_strings":["Nanyang Technological University (NTU),School of Electrical and Electronic Engineering,Singapore","School of Electrical and Electronic Engineering, Nanyang Technological University (NTU), Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University (NTU),School of Electrical and Electronic Engineering,Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University (NTU), Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058452657","display_name":"Huy Phan","orcid":"https://orcid.org/0000-0003-4096-785X"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Huy Phan","raw_affiliation_strings":["Queen Mary University of London,School of Electronic Engineering and Computer Science,UK","School of Electronic Engineering and Computer Science, Queen Mary University of London, UK"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,School of Electronic Engineering and Computer Science,UK","institution_ids":["https://openalex.org/I166337079"]},{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London, UK","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072584895","display_name":"Woon\u2010Seng Gan","orcid":"https://orcid.org/0000-0002-7143-1823"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Woon-Seng Gan","raw_affiliation_strings":["Nanyang Technological University (NTU),School of Electrical and Electronic Engineering,Singapore","School of Electrical and Electronic Engineering, Nanyang Technological University (NTU), Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University (NTU),School of Electrical and Electronic Engineering,Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University (NTU), Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5039913415"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":3.9071,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.95403023,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"716","last_page":"720"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8405965566635132},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.622503936290741},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5934070348739624},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5334729552268982},{"id":"https://openalex.org/keywords/polyphony","display_name":"Polyphony","score":0.5317873954772949},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5302885174751282},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5147831439971924},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.41582566499710083},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.1107088029384613},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07081019878387451}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8405965566635132},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.622503936290741},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5934070348739624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5334729552268982},{"id":"https://openalex.org/C128979739","wikidata":"https://www.wikidata.org/wiki/Q179465","display_name":"Polyphony","level":2,"score":0.5317873954772949},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5302885174751282},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5147831439971924},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41582566499710083},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.1107088029384613},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07081019878387451},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746132","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746132","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2111.08192","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2111.08192","pdf_url":"https://arxiv.org/pdf/2111.08192","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2111.08192","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2111.08192","pdf_url":"https://arxiv.org/pdf/2111.08192","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309327","display_name":"Google","ror":"https://ror.org/00njsd438"},{"id":"https://openalex.org/F4320311649","display_name":"Ministry of Education","ror":"https://ror.org/036nq5137"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1981463705","https://openalex.org/W2066218102","https://openalex.org/W2113638573","https://openalex.org/W2130121545","https://openalex.org/W2136484266","https://openalex.org/W2167962272","https://openalex.org/W2798909945","https://openalex.org/W2810934215","https://openalex.org/W2892163332","https://openalex.org/W2936774411","https://openalex.org/W2942551338","https://openalex.org/W2947942791","https://openalex.org/W2964121744","https://openalex.org/W2998508940","https://openalex.org/W3005741390","https://openalex.org/W3033731578","https://openalex.org/W3083274258","https://openalex.org/W3094550259","https://openalex.org/W3098454764","https://openalex.org/W3120252178","https://openalex.org/W3163193264","https://openalex.org/W3163206520","https://openalex.org/W3163881933","https://openalex.org/W3167721750","https://openalex.org/W3171659463","https://openalex.org/W3174280965","https://openalex.org/W3176079376","https://openalex.org/W3177143793","https://openalex.org/W4287120192","https://openalex.org/W4287766186","https://openalex.org/W4312258136","https://openalex.org/W4324116353","https://openalex.org/W6631190155","https://openalex.org/W6680680088","https://openalex.org/W6773738941","https://openalex.org/W6779923105","https://openalex.org/W6782346483","https://openalex.org/W6788349323","https://openalex.org/W6796058780","https://openalex.org/W6796679619","https://openalex.org/W6797253786","https://openalex.org/W6797669297"],"related_works":["https://openalex.org/W2411659965","https://openalex.org/W2387677326","https://openalex.org/W4200063482","https://openalex.org/W2357575019","https://openalex.org/W2370117122","https://openalex.org/W2530685530","https://openalex.org/W2360603947","https://openalex.org/W4375868962","https://openalex.org/W2897924318","https://openalex.org/W2138997758"],"abstract_inverted_index":{"Polyphonic":[0],"sound":[1],"event":[2],"localization":[3,169],"and":[4,14,43,144,167,173],"detection":[5],"(SELD)":[6],"has":[7,22],"many":[8],"practical":[9],"applications":[10],"in":[11],"acoustic":[12],"sensing":[13],"monitoring.":[15],"However,":[16],"the":[17,26,81,86,116,123,132,140,147],"development":[18],"of":[19,30,58,75,85,151],"real-time":[20],"SELD":[21,33,48],"been":[23],"limited":[24],"by":[25,171],"demanding":[27],"computational":[28],"requirement":[29],"most":[31],"recent":[32],"systems.":[34],"In":[35,92],"this":[36],"work,":[37],"we":[38],"introduce":[39],"SALSA-Lite,":[40],"a":[41,55,59,111],"fast":[42],"effective":[44],"feature":[45,63,134,149],"for":[46,64,70],"polyphonic":[47,65],"using":[49,160,178],"microphone":[50],"array":[51],"inputs.":[52],"SALSA-Lite":[53,101,133,161],"is":[54],"lightweight":[56],"variation":[57],"previously":[60],"proposed":[61],"SALSA":[62,118,142],"SELD.":[66],"SALSA,":[67,95],"which":[68,96],"stands":[69],"Spatial":[71,125],"Cue-Augmented":[72],"Log-Spectrogram,":[73],"consists":[74],"multichannel":[76,152,179],"log-spectrograms":[77],"stacked":[78],"channel-wise":[79],"with":[80,155,182],"normalized":[82,103],"principal":[83],"eigenvectors":[84],"spectrotemporally":[87],"corresponding":[88],"spatial":[89,99,108],"covariance":[90],"matrices.":[91],"contrast":[93],"to":[94,115,139,177],"uses":[97,102],"eigenvector-based":[98],"features,":[100,109],"inter-channel":[104],"phase":[105],"differences":[106],"as":[107],"allowing":[110],"30-fold":[112],"speedup":[113],"compared":[114,138,176],"original":[117],"feature.":[119],"Experimental":[120],"results":[121],"on":[122],"TAU-NIGENS":[124],"Sound":[126],"Events":[127],"2021":[128],"dataset":[129],"showed":[130],"that":[131],"achieved":[135],"competitive":[136],"performance":[137],"full":[141],"feature,":[143],"significantly":[145],"outperformed":[146],"traditional":[148],"set":[150],"log-mel":[153,180],"spectrograms":[154,181],"generalized":[156,183],"cross-correlation":[157,184],"spectra.":[158,185],"Specifically,":[159],"features":[162],"increased":[163],"localization-dependent":[164],"F1":[165],"score":[166],"class-dependent":[168],"recall":[170],"15%":[172],"5%,":[174],"respectively,":[175]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":2}],"updated_date":"2026-03-25T14:56:36.534964","created_date":"2021-11-22T00:00:00"}
