{"id":"https://openalex.org/W6912081865","doi":"https://doi.org/10.5281/zenodo.15559774","title":"DCASE2025 Task3 Stereo SELD Dataset","display_name":"DCASE2025 Task3 Stereo SELD Dataset","publication_year":2025,"publication_date":"2025-06-01","ids":{"openalex":"https://openalex.org/W6912081865","doi":"https://doi.org/10.5281/zenodo.15559774"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.15559774","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15559774","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.15559774","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Shimada, Kazuki","orcid":"https://orcid.org/0000-0001-5389-2346"},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shimada, Kazuki","raw_affiliation_strings":["Sony AI"],"raw_orcid":"https://orcid.org/0000-0001-5389-2346","affiliations":[{"raw_affiliation_string":"Sony AI","institution_ids":["https://openalex.org/I2800278093"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Politis, Archontis","orcid":"https://orcid.org/0000-0002-0595-2356"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Politis, Archontis","raw_affiliation_strings":["Tampere University"],"raw_orcid":"https://orcid.org/0000-0002-0595-2356","affiliations":[{"raw_affiliation_string":"Tampere University","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Roman, Iran R.","orcid":"https://orcid.org/0000-0003-3781-7244"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Roman, Iran R.","raw_affiliation_strings":["Queen Mary University of London"],"raw_orcid":"https://orcid.org/0000-0003-3781-7244","affiliations":[{"raw_affiliation_string":"Queen Mary University of London","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Sudarsanam, Parthasaarathy","orcid":"https://orcid.org/0009-0009-3751-6469"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Sudarsanam, Parthasaarathy","raw_affiliation_strings":["Tampere University"],"raw_orcid":"https://orcid.org/0009-0009-3751-6469","affiliations":[{"raw_affiliation_string":"Tampere University","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"middle","author":{"id":null,"display_name":"D\u00edaz-Guerra Aparicio, David","orcid":"https://orcid.org/0000-0002-1041-0498"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"D\u00edaz-Guerra Aparicio, David","raw_affiliation_strings":["Tampere University"],"raw_orcid":"https://orcid.org/0000-0002-1041-0498","affiliations":[{"raw_affiliation_string":"Tampere University","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Pandey, Ruchi","orcid":"https://orcid.org/0009-0002-6627-2706"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Pandey, Ruchi","raw_affiliation_strings":["Tampere University"],"raw_orcid":"https://orcid.org/0009-0002-6627-2706","affiliations":[{"raw_affiliation_string":"Tampere University","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Uchida, Kengo","orcid":null},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Uchida, Kengo","raw_affiliation_strings":["Sony AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony AI","institution_ids":["https://openalex.org/I2800278093"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Koyama, Yuichiro","orcid":null},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Koyama, Yuichiro","raw_affiliation_strings":["Sony Group Corporation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony Group Corporation","institution_ids":["https://openalex.org/I2800278093"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Takahashi, Naoya","orcid":null},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Takahashi, Naoya","raw_affiliation_strings":["Sony AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony AI","institution_ids":["https://openalex.org/I2800278093"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Shibuya, Takashi","orcid":"https://orcid.org/0000-0002-4277-0164"},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shibuya, Takashi","raw_affiliation_strings":["Sony AI"],"raw_orcid":"https://orcid.org/0000-0002-4277-0164","affiliations":[{"raw_affiliation_string":"Sony AI","institution_ids":["https://openalex.org/I2800278093"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Takahashi, Shusuke","orcid":null},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Takahashi, Shusuke","raw_affiliation_strings":["Sony Group Corporation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony Group Corporation","institution_ids":["https://openalex.org/I2800278093"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Virtanen, Tuomas","orcid":"https://orcid.org/0000-0002-4604-9729"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Virtanen, Tuomas","raw_affiliation_strings":["Tampere University"],"raw_orcid":"https://orcid.org/0000-0002-4604-9729","affiliations":[{"raw_affiliation_string":"Tampere University","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"last","author":{"id":null,"display_name":"Mitsufuji, Yuki","orcid":"https://orcid.org/0000-0002-6806-6140"},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mitsufuji, Yuki","raw_affiliation_strings":["Sony AI","Sony Group Corporation"],"raw_orcid":"https://orcid.org/0000-0002-6806-6140","affiliations":[{"raw_affiliation_string":"Sony AI","institution_ids":["https://openalex.org/I2800278093"]},{"raw_affiliation_string":"Sony Group Corporation","institution_ids":["https://openalex.org/I2800278093"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":13,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I2800278093"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":null,"topics":[],"keywords":[{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.6057000160217285},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.531499981880188},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5024999976158142},{"id":"https://openalex.org/keywords/stereophonic-sound","display_name":"Stereophonic sound","score":0.40230000019073486},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4016000032424927},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.3968999981880188},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.3734000027179718}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.732200026512146},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6820999979972839},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6371999979019165},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.6057000160217285},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.531499981880188},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5024999976158142},{"id":"https://openalex.org/C140631703","wikidata":"https://www.wikidata.org/wiki/Q34678","display_name":"Stereophonic sound","level":3,"score":0.40230000019073486},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4016000032424927},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3968999981880188},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3734000027179718},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.3395000100135803},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.33410000801086426},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C128422554","wikidata":"https://www.wikidata.org/wiki/Q20077126","display_name":"Sound recording and reproduction","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.2727000117301941},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C2778739407","wikidata":"https://www.wikidata.org/wiki/Q165372","display_name":"CLIPS","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.25529998540878296},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.15559774","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15559774","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.5281/zenodo.15559774","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15559774","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":1,"referenced_works":["https://openalex.org/W4393661452"],"related_works":[],"abstract_inverted_index":{"Description":[0],"The":[1,19,66,96,256,270,279,378,444,493,697,712,894,943,966,983,1018,1052,1069,1138,1150,1172,1205,1284,1315],"DCASE2025":[2,64,112,379,469,1130,1160,1234,1264],"Task3":[3,113,380,470,1161],"Stereo":[4,114,366,381,471,861,1162],"SELD":[5,115,382,472,1140,1156,1163,1170],"Dataset":[6,383,473,1164],"is":[7,231,384,514,621,638,648,658,706,718,804,972,986,1006,1114,1146],"a":[8,91,159,185,196,517,522,540,747,787,1102,1175,1199],"stereo":[9,34,151,182,480,497,869,1139,1155,1190,1289,1320],"audio":[10,25,35,145,152,180,481,488,498,664,944,1067,1070,1182,1191,1290,1321],"and":[11,26,36,50,58,78,83,101,122,146,153,216,322,370,388,408,413,440,451,462,483,489,499,531,551,605,616,682,725,980,1015,1023,1040,1054,1062,1109,1134,1178,1203,1217,1219,1238,1244,1341],"video":[12,28,38,98,148,155,194,198,218,500,1053,1185,1194],"dataset":[13,52,68,418,445,501,949,990,1075,1145,1212],"derived":[14,761,774,793],"from":[15,125,134,263,762,775,794,1165],"the":[16,48,54,63,105,111,119,126,135,142,164,171,177,192,200,205,209,217,240,252,284,287,292,299,304,307,312,417,449,454,468,496,506,549,573,654,677,709,737,758,771,810,819,829,839,849,898,947,951,970,978,989,996,1011,1030,1033,1037,1058,1066,1073,1083,1087,1090,1122,1129,1159,1211,1220,1227,1233,1249,1261,1275,1293,1299,1303,1309,1312,1324,1330,1334,1338],"STARSS23":[17,67,127,137,288,710],"dataset.":[18,128,812,1314,1336],"original":[20,136,241],"STARSS23's":[21,242],"first-order":[22],"Ambisonics":[23],"(FOA)":[24],"360\u00b0":[27,97,147,652,1184],"data":[29,156,482,726,1141,1151,1277,1291,1322],"have":[30,1057,1268],"been":[31],"converted":[32],"to":[33,90,149,158,163,181,195,213,221,246,265,276,422,476,583,597,627,675,708,975,992,1028,1188,1198,1236,1256,1288,1319,1347],"perspective":[37,154,197,308,1193,1300,1331],"data,":[39],"simulating":[40],"regular":[41],"media":[42,235,490],"content.":[43,236],"These":[44],"clips":[45,133,686,742,759,772,782,1078,1216,1222],"serve":[46,1120],"as":[47,204,286,435,578,1065,1093,1121,1278],"development":[49,811,948,1214,1294,1304,1313],"evaluation":[51,389,447,1074,1204,1221,1251,1276,1325,1335],"for":[53,315,386,393,457,536,558,806,818,828,838,848,988,1143,1248,1292,1302,1311,1323,1333],"sound":[55,73,394,399,410,528,692,889],"event":[56,295,395,411,885,890],"localization":[57,401,412,438],"detection":[59,396,414],"(SELD)":[60],"task":[61],"of":[62,72,86,93,228,390,448,453,460,495,516,526,572,581,593,631,730,743,750,757,770,783,790,998,1032,1077,1101,1128,1210,1229,1232,1263],"Challenge.":[65],"contains":[69,1298,1308,1329],"multichannel":[70],"recordings":[71,550,665,738,763,776,795],"scenes":[74],"in":[75,234,306,318,486,505,521,548,736,764,777,796,815,825,835,845,946,961,964,1072,1089,1125],"various":[76],"rooms":[77,733,814,824,834,844],"environments,":[79],"together":[80],"with":[81,104,199,224,303,372,402,623,666,746,786,866,1111],"temporal":[82],"spatial":[84],"annotations":[85,557],"prominent":[87],"events":[88,561,570,586,595,693],"belonging":[89],"set":[92,208,679],"target":[94,544,574,691,888],"classes.":[95],"are":[99,260,273,282,534,546,562,576,587,602,672,760,773,817,827,837,847,1026,1043],"spatially":[100],"temporally":[102],"aligned":[103],"microphone":[106],"array":[107],"recordings.":[108,138],"To":[109,290],"construct":[110,1154],"Dataset,":[116],"we":[117,140,168,175,297],"conduct":[118],"following":[120],"sampling":[121,608,705],"conversion":[123],"procedures":[124],"We":[129,190,207,237],"first":[130,169],"sample":[131],"5-second":[132,143],"Then,":[139,174],"convert":[141,176,191],"FOA":[144,172,179,1169,1181],"generate":[150],"corresponding":[157],"fixed":[160,165,253],"point-of-view.":[161],"According":[162],"viewing":[166,202,254,617,646,656,1013,1021,1038,1200],"angle,":[167,1014,1022],"rotate":[170,239],"audio.":[173,206],"rotated":[178],"audio,":[183],"emulating":[184],"mid-side":[186,863],"(M/S)":[187,864],"recording":[188,512,518,620,1019],"technique.":[189],"equirectangular":[193],"same":[201,285,678,1059],"angle":[203,647,657,960,1039],"horizontal":[210,645],"field-of-view":[211],"(FOV)":[212],"100":[214],"degrees":[215],"resolution":[219],"(Width:Height)":[220],"640:360":[222],"pixels,":[223],"an":[225,565],"aspect":[226],"ratio":[227],"16:9,":[229],"which":[230],"widely":[232],"used":[233,421,973,1225],"also":[238,1147],"direction-of-arrival":[243],"(DOA)":[244],"labels":[245,249,259,272,281,302,1218],"new":[247,257,300,1189],"DOA":[248,301],"centered":[250],"at":[251,651,660,969,1046],"angle.":[255,618,1201],"azimuth":[258],"folded":[261],"back":[262,264],"front,":[266],"considering":[267],"front-back":[268],"ambiguity.":[269,278],"elevation":[271],"omitted":[274],"due":[275],"top-bottom":[277],"distance":[280],"kept":[283,659],"one.":[289,711],"get":[291],"binary":[293],"onscreen/offscreen":[294,713],"labels,":[296,1224],"compare":[298,1257],"FOV":[305],"video.":[309],"Please":[310],"check":[311],"challenge":[313,1250],"webpage":[314],"details":[316],"missing":[317,667],"this":[319,327,332,1112,1144],"description.":[320],"Report":[321],"reference":[323],"If":[324,1253],"you":[325,329],"use":[326,1274,1342],"dataset,":[328,1113],"could":[330],"cite":[331],"report":[333],"on":[334,432,1082,1086],"its":[335,612,1180],"construction":[336],"process:":[337],"Kazuki":[338],"Shimada,":[339],"Archontis":[340],"Politis,":[341],"Iran":[342],"R.":[343],"Roman,":[344],"Parthasaarathy":[345],"Sudarsanam,":[346],"David":[347],"Diaz-Guerra,":[348],"Ruchi":[349],"Pandey,":[350],"Kengo":[351],"Uchida,":[352],"Yuichiro":[353],"Koyama,":[354],"Naoya":[355],"Takahashi,":[356,360],"Takashi":[357],"Shibuya,":[358],"Shusuke":[359],"Tuomas":[361],"Virtanen,":[362],"Yuki":[363],"Mitsufuji":[364],"(2025).":[365],"Sound":[367,569,884],"Event":[368],"Localization":[369],"Detection":[371],"Onscreen/offscreen":[373],"Classification,":[374],"found":[375],"here.":[376,1116,1149],"Aim":[377],"suitable":[385],"training":[387,820,840,979],"machine-listening":[391],"models":[392,478],"(SED),":[397],"general":[398],"source":[400,437],"diverse":[403,458,464],"sounds":[404,461,919],"or":[405,926,1085,1167],"signal-of-interest":[406],"localization,":[407],"joint":[409,1106],"(SELD).":[415],"Additionally,":[416],"can":[419,502,599,1153],"be":[420,503],"evaluate":[423,477],"signal":[424],"processing":[425],"methods":[426,439],"that":[427,1036],"do":[428,687],"not":[429,673,688,1044],"necessarily":[430],"rely":[431],"training,":[433],"such":[434],"acoustic":[436,442,465],"multiple-source":[441],"tracking.":[443],"allows":[446,474],"performance":[450,997],"robustness":[452],"aforementioned":[455],"applications":[456],"types":[459],"under":[463,1132],"conditions.":[466,1003],"Specifically,":[467],"us":[475],"using":[479,809],"explore":[484],"tasks":[485],"common":[487],"scenarios.":[491],"Specifications":[492],"specifications":[494],"summarized":[504],"following:":[507],"Recording":[508],"(STARSS22/23":[509],"setup):":[510],"Each":[511,607,1004],"clip":[513,1005,1176],"part":[515],"session":[519,538],"happening":[520],"unique":[523,535,732],"room.":[524],"Groups":[525],"participants,":[527],"making":[529],"props,":[530],"scene":[532],"scenarios":[533],"each":[537,642],"(with":[539],"few":[541],"exceptions).":[542],"13":[543,887],"classes":[545,575,886,891,895],"identified":[547],"strongly":[552],"annotated":[553],"by":[554,564,1008,1226],"humans.":[555],"Spatial":[556],"those":[559],"active":[560],"captured":[563,735],"optical":[566],"tracking":[567],"system.":[568],"out":[571],"considered":[577],"interference.":[579],"Occurrences":[580],"up":[582],"3":[584,1127,1231],"simultaneous":[585],"fairly":[588],"common,":[589],"while":[590,653],"higher":[591],"numbers":[592],"overlapping":[594],"(up":[596],"6)":[598],"occur":[600],"but":[601,1049],"rare.":[603],"Sampling":[604,854],"conversion:":[606],"step":[609],"randomly":[610,1009,1177],"selects":[611],"recording,":[613,1012],"start":[614,636,1016,1024,1041],"frame,":[615],"A":[619,635,644,728,801],"selected":[622,639,649,674],"length-weighted":[624],"random":[625,695,704],"choice":[626],"treat":[628],"all":[629,632,701,716],"frames":[630,702,717,879],"files":[633,945,1056,1071,1340],"equally.":[634],"frame":[637],"uniformly":[640,650],"within":[641],"recording.":[643],"vertical":[655],"0\u00b0":[661],"elevation.":[662],"12":[663],"videos":[668,1301,1332],"(fold3_room21_mix001.wav":[669],"-":[670],"fold3_room21_mix012.wav)":[671],"keep":[676],"between":[680,977],"audio-only":[681,1133],"audiovisual":[683,1135],"tracks.":[684,1137],"Several":[685],"contain":[689],"any":[690,1080],"after":[694,703],"sampling.":[696],"class":[698],"distribution":[699,714],"across":[700,715],"similar":[707],"around":[719],"1":[720],":":[721],"3.":[722,913],"Volume,":[723],"duration,":[724,745,785],"split:":[727],"total":[729,748,788],"16":[731,859],"were":[734,892],"(development":[739,753,766,779],"set).":[740],"30,000":[741],"5-sec":[744,784],"time":[749,789,963,1025,1042],"41.7":[751],"hrs":[752],"dataset).":[754,767,780,800],"23.9":[755],"%":[756,769],"Tokyo":[765,816,826],"76.1":[768],"Tampere":[778,836,846],"10,000":[781],"13.9":[791],"hrs,":[792],"both":[797],"sites":[798],"(evaluation":[799],"training-testing":[802],"split":[803,821,831,841,851],"provided":[805,987,1027,1115],"reporting":[807],"results":[808,1271],"2":[813,823],"(dev-train-sony).":[822],"testing":[830,850,981,1280],"(dev-test-sony).":[832],"7":[833],"(dev-train-tau).":[842],"5":[843],"(dev-test-tau).":[852],"Audio:":[853],"rate:":[855],"24kHz.":[856],"Bit":[857],"depth:":[858],"bits.":[860],"format:":[862,873],"technique":[865],"left-right":[867],"cardioid":[868],"patterns.":[870],"Video:":[871],"Video":[872,875,878],"perspective.":[874],"resolution:":[876],"640x360.":[877],"per":[880,958],"second":[881],"(fps):":[882],"29.97.":[883],"annotated.":[893],"follow":[896,950],"loosely":[897],"AudioSet":[899],"ontology.":[900],"0.":[901],"Female":[902],"speech,":[903,908],"woman":[904],"speaking":[905,910],"1.":[906],"Male":[907],"man":[909],"2.":[911],"Clapping":[912],"Telephone":[914],"4.":[915],"Laughter":[916],"5.":[917],"Domestic":[918],"6.":[920],"Walk,":[921],"footsteps":[922],"7.":[923],"Door,":[924],"open":[925],"close":[927],"8.":[928],"Music":[929],"9.":[930],"Musical":[931],"instrument":[932],"10.":[933],"Water":[934],"tap,":[935],"faucet":[936],"11.":[937],"Bell":[938],"12.":[939],"Knock":[940],"Naming":[941],"convention":[942,1064],"naming":[952,1063,1091],"convention:":[953],"fold[fold":[954],"number]_room[room":[955],"number]_mix[recording":[956],"number":[957,968],"room]_deg[viewing":[959],"degree]_start[start":[962],"frame].wav":[965],"fold":[967],"moment":[971],"only":[974],"distinguish":[976],"split.":[982],"room":[984],"information":[985,1081],"user":[991],"potentially":[993],"help":[994],"understand":[995],"their":[999,1240,1258,1279],"method":[1000,1124],"concerning":[1001],"different":[1002],"generated":[1007],"selecting":[1010],"time.":[1017],"number,":[1020],"indicate":[1029],"configuration":[1031],"clip.":[1034],"Note":[1035],"sampled":[1045,1050],"equal":[1047],"intervals":[1048],"randomly.":[1051],"metadata":[1055,1187,1196,1310],"folder":[1060],"structure":[1061],"files.":[1068,1351],"consists":[1076],"without":[1079,1223],"origin":[1084],"location":[1088],"convention,":[1092],"below:":[1094],"sample[clip":[1095],"number].wav":[1096],"Example":[1097],"application":[1098],"An":[1099],"implementation":[1100,1118],"trainable":[1103],"model":[1104],"performing":[1105],"SELD,":[1107],"trained":[1108],"evaluated":[1110],"This":[1117],"will":[1119,1267],"baseline":[1123],"Task":[1126,1230],"Challenge":[1131,1235],"inference":[1136],"generator":[1142,1152,1173],"available":[1148],"datasets":[1157],"like":[1158],"real":[1166],"synthetic":[1168],"datasets.":[1171],"samples":[1174],"converts":[1179],"/":[1183,1186,1192,1195],"according":[1197],"Development":[1202],"current":[1206],"version":[1207],"(Version":[1208],"1.1.0)":[1209],"includes":[1213],"audio/video":[1215],"participants":[1228],"train":[1237],"validate":[1239],"submitted":[1241],"systems":[1242],"(development),":[1243],"produce":[1245],"system":[1246,1259],"outputs":[1247],"phase.":[1252],"researchers":[1254],"wish":[1255],"against":[1260],"submissions":[1262],"Challenge,":[1265],"they":[1266,1273],"directly":[1269],"comparable":[1270],"if":[1272],"set.":[1281],"Download":[1282,1337],"instruction":[1283],"file":[1285,1296,1306,1316,1327],"stereo_dev.zip":[1286],"corresponds":[1287,1318],"dataset.The":[1295,1305,1326],"video_dev.zip":[1297],"metadata_dev.zip":[1307],"stereo_eval.zip":[1317],"video_eval.zip":[1328],"zip":[1339,1350],"your":[1343],"favorite":[1344],"compression":[1345],"tool":[1346],"unzip":[1348],"these":[1349]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
