{"id":"https://openalex.org/W3206619923","doi":"https://doi.org/10.1109/icassp43922.2022.9746754","title":"Spatial Data Augmentation with Simulated Room Impulse Responses for Sound Event Localization and Detection","display_name":"Spatial Data Augmentation with Simulated Room Impulse Responses for Sound Event Localization and Detection","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3206619923","doi":"https://doi.org/10.1109/icassp43922.2022.9746754","mag":"3206619923"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746754","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746754","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017785136","display_name":"Yuichiro Koyama","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yuichiro Koyama","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027795031","display_name":"Kazuhide Shigemi","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazuhide Shigemi","raw_affiliation_strings":["The University of Tokyo,Japan","The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035750216","display_name":"Masafumi Takahashi","orcid":"https://orcid.org/0000-0002-3268-8386"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Masafumi Takahashi","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051965288","display_name":"Kazuki Shimada","orcid":"https://orcid.org/0000-0001-5389-2346"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kazuki Shimada","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101589290","display_name":"Naoya Takahashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naoya Takahashi","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011937407","display_name":"Emiru Tsunoo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Emiru Tsunoo","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104117184","display_name":"Shusuke Takahashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shusuke Takahashi","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088754502","display_name":"Yuki Mitsufuji","orcid":"https://orcid.org/0000-0002-6806-6140"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuki Mitsufuji","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5017785136"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.471,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.83949447,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"8872","last_page":"8876"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7654467821121216},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.6908559203147888},{"id":"https://openalex.org/keywords/impulse","display_name":"Impulse (physics)","score":0.643657922744751},{"id":"https://openalex.org/keywords/impulse-response","display_name":"Impulse response","score":0.6352602243423462},{"id":"https://openalex.org/keywords/microphone-array","display_name":"Microphone array","score":0.5783437490463257},{"id":"https://openalex.org/keywords/spatial-analysis","display_name":"Spatial analysis","score":0.5020482540130615},{"id":"https://openalex.org/keywords/interference","display_name":"Interference (communication)","score":0.45210549235343933},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4177149534225464},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.4115390181541443},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.3911799192428589},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3885194659233093},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36555445194244385},{"id":"https://openalex.org/keywords/sound-pressure","display_name":"Sound pressure","score":0.11564314365386963},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.0929974615573883},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.08990171551704407},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08912765979766846},{"id":"https://openalex.org/keywords/remote-sensing","display_name":"Remote sensing","score":0.08761948347091675}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7654467821121216},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.6908559203147888},{"id":"https://openalex.org/C70836080","wikidata":"https://www.wikidata.org/wiki/Q837940","display_name":"Impulse (physics)","level":2,"score":0.643657922744751},{"id":"https://openalex.org/C72279823","wikidata":"https://www.wikidata.org/wiki/Q1139726","display_name":"Impulse response","level":2,"score":0.6352602243423462},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.5783437490463257},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.5020482540130615},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.45210549235343933},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4177149534225464},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.4115390181541443},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.3911799192428589},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3885194659233093},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36555445194244385},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.11564314365386963},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0929974615573883},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.08990171551704407},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08912765979766846},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.08761948347091675},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746754","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746754","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1740587110","https://openalex.org/W2034119727","https://openalex.org/W2168729028","https://openalex.org/W2400339399","https://openalex.org/W2514127661","https://openalex.org/W2541714600","https://openalex.org/W2570915410","https://openalex.org/W2622263826","https://openalex.org/W2763188033","https://openalex.org/W2772289775","https://openalex.org/W2810934215","https://openalex.org/W2889373351","https://openalex.org/W2918022041","https://openalex.org/W2936774411","https://openalex.org/W2942551338","https://openalex.org/W2963543871","https://openalex.org/W2972818416","https://openalex.org/W2982680886","https://openalex.org/W2998139081","https://openalex.org/W3005741390","https://openalex.org/W3007328579","https://openalex.org/W3033731578","https://openalex.org/W3098454764","https://openalex.org/W3117609054","https://openalex.org/W3120252178","https://openalex.org/W3163193264","https://openalex.org/W3163206520","https://openalex.org/W3163881933","https://openalex.org/W3171659463","https://openalex.org/W3174280965","https://openalex.org/W3176079376","https://openalex.org/W4287120192","https://openalex.org/W4287766186","https://openalex.org/W4394669361","https://openalex.org/W6729017730","https://openalex.org/W6739622702","https://openalex.org/W6760529994","https://openalex.org/W6773738941","https://openalex.org/W6779923105","https://openalex.org/W6786608192","https://openalex.org/W6788349323","https://openalex.org/W6796679619","https://openalex.org/W6797253786"],"related_works":["https://openalex.org/W1879255185","https://openalex.org/W2769861442","https://openalex.org/W1980506188","https://openalex.org/W2900122540","https://openalex.org/W4240587264","https://openalex.org/W1975973603","https://openalex.org/W2963983801","https://openalex.org/W3119734852","https://openalex.org/W2804363668","https://openalex.org/W1515932869"],"abstract_inverted_index":{"Recording":[0],"and":[1,11,18,106,170],"annotating":[2],"real":[3],"sound":[4,8,66,113],"events":[5,55,114],"for":[6,172],"a":[7,40,93,118],"event":[9],"localization":[10],"detection":[12],"(SELD)":[13],"task":[14],"is":[15,30],"time":[16],"consuming,":[17],"data":[19,29],"augmentation":[20],"techniques":[21],"are":[22,103,115,123],"often":[23],"favored":[24],"when":[25],"the":[26,36,107,111,127,142,151,156,168,176],"amount":[27],"of":[28,110],"limited.":[31],"However,":[32],"how":[33],"to":[34,59,92,97,131,154,166],"augment":[35],"spatial":[37,62,82],"information":[38],"in":[39,100],"dataset,":[41],"including":[42],"unlabeled":[43],"directional":[44,53],"interference":[45,54],"events,":[46],"remains":[47],"an":[48,74,133,163],"open":[49],"research":[50],"question.":[51],"Furthermore,":[52],"make":[56],"it":[57],"difficult":[58],"accurately":[60,104],"extract":[61],"characteristics":[63,83],"from":[64,117],"target":[65,112],"events.":[67],"To":[68],"address":[69],"this":[70],"problem,":[71],"we":[72,161],"propose":[73],"impulse":[75,87],"response":[76],"simulation":[77],"framework":[78],"(IRS)":[79],"that":[80,150],"augments":[81],"using":[84,141],"simulated":[85,121],"room":[86],"responses":[88],"(RIR).":[89],"RIRs":[90,122],"corresponding":[91],"microphone":[94],"array":[95],"assumed":[96],"be":[98],"placed":[99],"various":[101],"rooms":[102],"simulated,":[105],"source":[108,129],"signals":[109,130],"extracted":[116,128],"mixture.":[119],"The":[120],"then":[124],"convolved":[125],"with":[126],"obtain":[132],"augmented":[134],"multi-channel":[135],"training":[136],"dataset.":[137],"Evaluation":[138],"results":[139],"obtained":[140],"TAU-NIGENS":[143],"Spatial":[144],"Sound":[145],"Events":[146],"2021":[147],"dataset":[148],"show":[149],"IRS":[152],"contributes":[153],"improving":[155],"overall":[157],"SELD":[158],"performance.":[159],"Additionally,":[160],"conducted":[162],"ablation":[164],"study":[165],"discuss":[167],"contribution":[169],"need":[171],"each":[173],"component":[174],"within":[175],"IRS.":[177]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
