{"id":"https://openalex.org/W3204428002","doi":"https://doi.org/10.1109/icassp43922.2022.9747588","title":"Sound Event Detection Guided by Semantic Contexts of Scenes","display_name":"Sound Event Detection Guided by Semantic Contexts of Scenes","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3204428002","doi":"https://doi.org/10.1109/icassp43922.2022.9747588","mag":"3204428002"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747588","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747588","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059521640","display_name":"Noriyuki Tonami","orcid":null},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Noriyuki Tonami","raw_affiliation_strings":["Ritsumeikan University,Japan","Ritsumeikan University, Japan"],"affiliations":[{"raw_affiliation_string":"Ritsumeikan University,Japan","institution_ids":["https://openalex.org/I135768898"]},{"raw_affiliation_string":"Ritsumeikan University, Japan","institution_ids":["https://openalex.org/I135768898"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009991655","display_name":"Keisuke Imoto","orcid":"https://orcid.org/0000-0002-0703-8293"},"institutions":[{"id":"https://openalex.org/I133984924","display_name":"Doshisha University","ror":"https://ror.org/01fxdkm29","country_code":"JP","type":"education","lineage":["https://openalex.org/I133984924"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keisuke Imoto","raw_affiliation_strings":["Doshisha University,Japan","Doshisha University, Japan"],"affiliations":[{"raw_affiliation_string":"Doshisha University,Japan","institution_ids":["https://openalex.org/I133984924"]},{"raw_affiliation_string":"Doshisha University, Japan","institution_ids":["https://openalex.org/I133984924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001641384","display_name":"Ryotaro Nagase","orcid":null},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ryotaro Nagase","raw_affiliation_strings":["Ritsumeikan University,Japan","Ritsumeikan University, Japan"],"affiliations":[{"raw_affiliation_string":"Ritsumeikan University,Japan","institution_ids":["https://openalex.org/I135768898"]},{"raw_affiliation_string":"Ritsumeikan University, Japan","institution_ids":["https://openalex.org/I135768898"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113959532","display_name":"Yuki Okamoto","orcid":null},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuki Okamoto","raw_affiliation_strings":["Ritsumeikan University,Japan","Ritsumeikan University, Japan"],"affiliations":[{"raw_affiliation_string":"Ritsumeikan University,Japan","institution_ids":["https://openalex.org/I135768898"]},{"raw_affiliation_string":"Ritsumeikan University, Japan","institution_ids":["https://openalex.org/I135768898"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051293207","display_name":"Takahiro Fukumori","orcid":"https://orcid.org/0000-0002-4317-9704"},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takahiro Fukumori","raw_affiliation_strings":["Ritsumeikan University,Japan","Ritsumeikan University, Japan"],"affiliations":[{"raw_affiliation_string":"Ritsumeikan University,Japan","institution_ids":["https://openalex.org/I135768898"]},{"raw_affiliation_string":"Ritsumeikan University, Japan","institution_ids":["https://openalex.org/I135768898"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103048025","display_name":"Yoichi Yamashita","orcid":"https://orcid.org/0000-0001-5379-9686"},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoichi Yamashita","raw_affiliation_strings":["Ritsumeikan University,Japan","Ritsumeikan University, Japan"],"affiliations":[{"raw_affiliation_string":"Ritsumeikan University,Japan","institution_ids":["https://openalex.org/I135768898"]},{"raw_affiliation_string":"Ritsumeikan University, Japan","institution_ids":["https://openalex.org/I135768898"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5059521640"],"corresponding_institution_ids":["https://openalex.org/I135768898"],"apc_list":null,"apc_paid":null,"fwci":0.6129,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.6300158,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"801","last_page":"805"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7380239963531494},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6543787717819214},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.5820660591125488},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5747514367103577},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5544549226760864},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.45642703771591187},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.43726468086242676},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4270651042461395},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3996388912200928},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36496850848197937},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3598165512084961}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7380239963531494},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6543787717819214},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5820660591125488},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5747514367103577},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5544549226760864},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.45642703771591187},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.43726468086242676},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4270651042461395},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3996388912200928},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36496850848197937},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3598165512084961},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747588","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747588","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1605026219","https://openalex.org/W2009504845","https://openalex.org/W2123843894","https://openalex.org/W2160335114","https://openalex.org/W2408239454","https://openalex.org/W2526050071","https://openalex.org/W2566935005","https://openalex.org/W2591013610","https://openalex.org/W2746554716","https://openalex.org/W2775505379","https://openalex.org/W2801393360","https://openalex.org/W2896457183","https://openalex.org/W2897802972","https://openalex.org/W2972581694","https://openalex.org/W2997892289","https://openalex.org/W3015420744","https://openalex.org/W3015700860","https://openalex.org/W3016195076","https://openalex.org/W3049446265","https://openalex.org/W3085893206","https://openalex.org/W3092858428","https://openalex.org/W3092862347","https://openalex.org/W3096579837","https://openalex.org/W3100197791","https://openalex.org/W3121952123","https://openalex.org/W3138521398","https://openalex.org/W3160950290","https://openalex.org/W3161638288","https://openalex.org/W3193321031","https://openalex.org/W4289329167","https://openalex.org/W4320930577","https://openalex.org/W6636246373","https://openalex.org/W6746023985","https://openalex.org/W6747331233","https://openalex.org/W6753516609","https://openalex.org/W6755207826","https://openalex.org/W6783078499"],"related_works":["https://openalex.org/W2055243143","https://openalex.org/W2062195135","https://openalex.org/W4321636575","https://openalex.org/W2357796999","https://openalex.org/W2045526782","https://openalex.org/W2741131631","https://openalex.org/W1986418932","https://openalex.org/W2156919374","https://openalex.org/W35446969","https://openalex.org/W1984019423"],"abstract_inverted_index":{"Some":[0],"studies":[1],"have":[2],"revealed":[3],"that":[4,143],"contexts":[5,44,64,80,105,123],"of":[6,35,57,106,121],"scenes":[7,29,59,107],"(e.g.,":[8],"\"home,\"":[9],"\"office,\"":[10],"and":[11,22,136,149,154,162],"\"cooking\")":[12],"are":[13,60,81,95],"advantageous":[14],"for":[15,30,65,83,126],"sound":[16],"event":[17],"detection":[18],"(SED).":[19],"Mobile":[20],"devices":[21],"sensing":[23],"technologies":[24],"give":[25],"useful":[26,125],"information":[27],"on":[28],"SED":[31,76,99],"without":[32],"the":[33,88,114,118,144],"use":[34],"acoustic":[36],"signals.":[37],"However,":[38],"conventional":[39,67,160],"methods":[40],"can":[41],"employ":[42,102],"pre-defined":[43,58,78],"in":[45,108],"inference":[46,109],"stages":[47],"but":[48],"not":[49],"undefined":[50],"contexts.":[51],"This":[52],"is":[53,124],"because":[54],"one-hot":[55],"representations":[56],"exploited":[61],"as":[62],"prior":[63],"such":[66],"methods.":[68],"To":[69],"alleviate":[70],"this":[71],"problem,":[72],"we":[73,112],"propose":[74],"scene-informed":[75],"where":[77],"scene-agnostic":[79],"available":[82],"more":[84],"accurate":[85],"SED.":[86,127],"In":[87],"proposed":[89,145],"method,":[90],"pre-trained":[91],"large-scale":[92],"language":[93],"models":[94,100],"utilized,":[96],"which":[97,117],"enables":[98],"to":[101,116],"unseen":[103],"semantic":[104,119],"stages.":[110],"Moreover,":[111],"investigated":[113],"extent":[115],"representation":[120],"scene":[122],"Experimental":[128],"results":[129],"performed":[130],"with":[131,159],"TUT":[132,137],"Sound":[133],"Events":[134],"2016/2017":[135,140],"Acoustic":[138],"Scenes":[139],"datasets":[141],"show":[142],"method":[146],"improves":[147],"micro":[148],"macro":[150],"F-scores":[151],"by":[152],"4.34":[153],"3.13":[155],"percentage":[156],"points":[157],"compared":[158],"Conformer-":[161],"CNN\u2013":[163],"BiGRU-based":[164],"SED,":[165],"respectively.":[166]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
