{"id":"https://openalex.org/W4392904420","doi":"https://doi.org/10.1109/icassp48485.2024.10446118","title":"Spatial Scaper: A Library to Simulate and Augment Soundscapes for Sound Event Localization and Detection in Realistic Rooms","display_name":"Spatial Scaper: A Library to Simulate and Augment Soundscapes for Sound Event Localization and Detection in Realistic Rooms","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392904420","doi":"https://doi.org/10.1109/icassp48485.2024.10446118"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446118","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446118","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027526258","display_name":"Iran R. Roman","orcid":"https://orcid.org/0000-0003-3781-7244"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Iran R. Roman","raw_affiliation_strings":["New York University,Music and Audio Research Laboratory,New York,USA","Music and Audio Research Laboratory, New York University, New York, USA"],"affiliations":[{"raw_affiliation_string":"New York University,Music and Audio Research Laboratory,New York,USA","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"Music and Audio Research Laboratory, New York University, New York, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012288092","display_name":"Christopher Ick","orcid":null},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Ick","raw_affiliation_strings":["New York University,Music and Audio Research Laboratory,New York,USA","Music and Audio Research Laboratory, New York University, New York, USA"],"affiliations":[{"raw_affiliation_string":"New York University,Music and Audio Research Laboratory,New York,USA","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"Music and Audio Research Laboratory, New York University, New York, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Sivan Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sivan Ding","raw_affiliation_strings":["New York University,Music and Audio Research Laboratory,New York,USA","Music and Audio Research Laboratory, New York University, New York, USA"],"affiliations":[{"raw_affiliation_string":"New York University,Music and Audio Research Laboratory,New York,USA","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"Music and Audio Research Laboratory, New York University, New York, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081976456","display_name":"Adrian S. Roman","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]},{"id":"https://openalex.org/I144571360","display_name":"Viterbo University","ror":"https://ror.org/039p8pn96","country_code":"US","type":"education","lineage":["https://openalex.org/I144571360"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adrian S. Roman","raw_affiliation_strings":["University of Southern California,Viterbi School of Engineering,California,USA","Viterbi School of Engineering, University of Southern California, California, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California,Viterbi School of Engineering,California,USA","institution_ids":["https://openalex.org/I144571360","https://openalex.org/I1174212"]},{"raw_affiliation_string":"Viterbi School of Engineering, University of Southern California, California, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010404092","display_name":"Brian McFee","orcid":"https://orcid.org/0000-0001-6261-9747"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian McFee","raw_affiliation_strings":["New York University,Music and Audio Research Laboratory,New York,USA","Music and Audio Research Laboratory, New York University, New York, USA"],"affiliations":[{"raw_affiliation_string":"New York University,Music and Audio Research Laboratory,New York,USA","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"Music and Audio Research Laboratory, New York University, New York, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031398497","display_name":"Juan Pablo Bello","orcid":"https://orcid.org/0000-0001-8561-5204"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Juan P. Bello","raw_affiliation_strings":["New York University,Music and Audio Research Laboratory,New York,USA","Music and Audio Research Laboratory, New York University, New York, USA"],"affiliations":[{"raw_affiliation_string":"New York University,Music and Audio Research Laboratory,New York,USA","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"Music and Audio Research Laboratory, New York University, New York, USA","institution_ids":["https://openalex.org/I57206974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5027526258"],"corresponding_institution_ids":["https://openalex.org/I57206974"],"apc_list":null,"apc_paid":null,"fwci":6.9139,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.97657658,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1221","last_page":"1225"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7670832872390747},{"id":"https://openalex.org/keywords/soundscape","display_name":"Soundscape","score":0.5426332354545593},{"id":"https://openalex.org/keywords/impulse-response","display_name":"Impulse response","score":0.42852553725242615},{"id":"https://openalex.org/keywords/sound","display_name":"Sound (geography)","score":0.38350850343704224},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3584592044353485},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3477746248245239},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.33860939741134644}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7670832872390747},{"id":"https://openalex.org/C142795923","wikidata":"https://www.wikidata.org/wiki/Q1358257","display_name":"Soundscape","level":3,"score":0.5426332354545593},{"id":"https://openalex.org/C72279823","wikidata":"https://www.wikidata.org/wiki/Q1139726","display_name":"Impulse response","level":2,"score":0.42852553725242615},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.38350850343704224},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3584592044353485},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3477746248245239},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.33860939741134644},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446118","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446118","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:qmro.qmul.ac.uk:123456789/102554","is_oa":false,"landing_page_url":"https://qmro.qmul.ac.uk/xmlui/handle/123456789/102554","pdf_url":null,"source":{"id":"https://openalex.org/S4306400530","display_name":"Queen Mary Research Online (Queen Mary University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I166337079","host_organization_name":"Queen Mary University of London","host_organization_lineage":["https://openalex.org/I166337079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Proceeding"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6205568849","display_name":"III: Medium: Spatial Sound Scene Description","funder_award_id":"1955357","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2771361008","https://openalex.org/W2888793942","https://openalex.org/W2900372165","https://openalex.org/W2982382207","https://openalex.org/W3119877627","https://openalex.org/W3197097128","https://openalex.org/W3206329344","https://openalex.org/W3210404408","https://openalex.org/W4205689591","https://openalex.org/W4239318779","https://openalex.org/W4307481987","https://openalex.org/W4309092811","https://openalex.org/W4324116353","https://openalex.org/W4380994299","https://openalex.org/W4385822950","https://openalex.org/W4386764032","https://openalex.org/W4393782081","https://openalex.org/W6684354967","https://openalex.org/W6853532597","https://openalex.org/W6910757473"],"related_works":["https://openalex.org/W1254698036","https://openalex.org/W2896201871","https://openalex.org/W1988083194","https://openalex.org/W2282503792","https://openalex.org/W2365709658","https://openalex.org/W1562588264","https://openalex.org/W1593731728","https://openalex.org/W2970126706","https://openalex.org/W2770198033","https://openalex.org/W2809061312"],"abstract_inverted_index":{"Sound":[0],"event":[1],"localization":[2],"and":[3,25,67,82,94],"detection":[4],"(SELD)":[5],"is":[6,31,150],"an":[7],"important":[8],"task":[9],"in":[10,22,47,55],"machine":[11],"listening.":[12],"Major":[13],"advancements":[14],"rely":[15],"on":[16],"simulated":[17,32],"data":[18,30,65,101,132],"with":[19,40,130],"sound":[20,41,45,96],"events":[21,46],"specific":[23,56],"rooms":[24,76,121],"strong":[26],"spatio-temporal":[27],"labels.":[28],"SELD":[29,64,110,125,155],"by":[33],"convolving":[34],"spatialy-localized":[35],"room":[36],"impulse":[37],"responses":[38],"(RIRs)":[39],"waveforms":[42],"to":[43,70,108,119,122,134,152],"place":[44],"a":[48,61,113,128,139],"soundscape.":[49],"However,":[50],"RIRs":[51],"require":[52],"manual":[53],"collection":[54],"rooms.":[57],"We":[58],"present":[59],"SpatialScaper,":[60],"library":[62],"for":[63,87],"simulation":[66],"augmentation.":[68],"Compared":[69],"existing":[71,109],"tools,":[72],"SpatialScaper":[73,98,118,149],"emulates":[74],"virtual":[75],"via":[77],"parameters":[78],"such":[79],"as":[80,138],"size":[81],"wall":[83],"absorption.":[84],"This":[85],"allows":[86],"parameterized":[88],"placement":[89],"(including":[90],"movement)":[91],"of":[92,142],"foreground":[93],"background":[95],"sources.":[97],"also":[99],"includes":[100],"augmentation":[102],"pipelines":[103],"that":[104,148],"can":[105],"be":[106],"applied":[107],"data.":[111,126],"As":[112],"case":[114],"study,":[115],"we":[116],"use":[117],"add":[120],"the":[123],"DCASE":[124],"Training":[127],"model":[129],"our":[131],"led":[133],"progressive":[135],"performance":[136],"improves":[137],"direct":[140],"function":[141],"acoustic":[143],"diversity.":[144],"These":[145],"results":[146],"show":[147],"valuable":[151],"train":[153],"robust":[154],"models.":[156]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
