{"id":"https://openalex.org/W4387448621","doi":"https://doi.org/10.1109/taslp.2023.3323135","title":"Cooperative Scene-Event Modelling for Acoustic Scene Classification","display_name":"Cooperative Scene-Event Modelling for Acoustic Scene Classification","publication_year":2023,"publication_date":"2023-10-09","ids":{"openalex":"https://openalex.org/W4387448621","doi":"https://doi.org/10.1109/taslp.2023.3323135"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2023.3323135","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3323135","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/6633080/10274856.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://ieeexplore.ieee.org/ielx7/6570655/6633080/10274856.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063248625","display_name":"Yuanbo Hou","orcid":"https://orcid.org/0000-0001-8469-5740"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Yuanbo Hou","raw_affiliation_strings":["WAVES Research Group, Ghent University, Gent, Belgium","Department of Information Technology, Ghent University Faculty of Engineering and Architecture, Gent, Belgium"],"affiliations":[{"raw_affiliation_string":"WAVES Research Group, Ghent University, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]},{"raw_affiliation_string":"Department of Information Technology, Ghent University Faculty of Engineering and Architecture, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090926391","display_name":"Bo Kang","orcid":"https://orcid.org/0000-0002-9895-9927"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Bo Kang","raw_affiliation_strings":["IDLab, Ghent University, Gent, Belgium","Electronics and Information Systems, Ghent University Faculty of Engineering and Architecture, Gent, Belgium"],"affiliations":[{"raw_affiliation_string":"IDLab, Ghent University, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]},{"raw_affiliation_string":"Electronics and Information Systems, Ghent University Faculty of Engineering and Architecture, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013266566","display_name":"Andrew Mitchell","orcid":"https://orcid.org/0000-0003-0978-5046"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Andrew Mitchell","raw_affiliation_strings":["Institute for Environmental Design and Engineering, The Bartlett, University College London, London, U.K","Institute for Environmental Design and Engineering, University College London Bartlett Faculty of the Built Environment, London, U.K"],"affiliations":[{"raw_affiliation_string":"Institute for Environmental Design and Engineering, The Bartlett, University College London, London, U.K","institution_ids":["https://openalex.org/I45129253","https://openalex.org/I166337079"]},{"raw_affiliation_string":"Institute for Environmental Design and Engineering, University College London Bartlett Faculty of the Built Environment, London, U.K","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100676721","display_name":"Wenwu Wang","orcid":"https://orcid.org/0000-0002-8393-5703"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Wenwu Wang","raw_affiliation_strings":["Centre for Vision, Speech, and Signal Processing, University of Surrey, Guildford, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech, and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100447381","display_name":"Jian Kang","orcid":"https://orcid.org/0000-0001-8995-5636"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jian Kang","raw_affiliation_strings":["Institute for Environmental Design and Engineering, The Bartlett, University College London, London, U.K","Institute for Environmental Design and Engineering, University College London Bartlett Faculty of the Built Environment, London, U.K"],"affiliations":[{"raw_affiliation_string":"Institute for Environmental Design and Engineering, The Bartlett, University College London, London, U.K","institution_ids":["https://openalex.org/I45129253","https://openalex.org/I166337079"]},{"raw_affiliation_string":"Institute for Environmental Design and Engineering, University College London Bartlett Faculty of the Built Environment, London, U.K","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069519911","display_name":"Dick Botteldooren","orcid":"https://orcid.org/0000-0002-7756-7238"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Dick Botteldooren","raw_affiliation_strings":["WAVES Research Group, Ghent University, Gent, Belgium","Department of Information Technology, Ghent University Faculty of Engineering and Architecture, Gent, Belgium"],"affiliations":[{"raw_affiliation_string":"WAVES Research Group, Ghent University, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]},{"raw_affiliation_string":"Department of Information Technology, Ghent University Faculty of Engineering and Architecture, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5063248625"],"corresponding_institution_ids":["https://openalex.org/I32597200"],"apc_list":null,"apc_paid":null,"fwci":2.3649,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.89774836,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"32","issue":null,"first_page":"68","last_page":"82"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9769999980926514,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7953369617462158},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.6948140859603882},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.521388590335846},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5119739770889282},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4825019836425781},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.41960638761520386},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.4180357754230499},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4121685028076172},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35731297731399536},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.24809911847114563}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7953369617462158},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.6948140859603882},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.521388590335846},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5119739770889282},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4825019836425781},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.41960638761520386},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4180357754230499},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4121685028076172},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35731297731399536},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.24809911847114563},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/taslp.2023.3323135","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3323135","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/6633080/10274856.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:archive.ugent.be:01HSDN7D520JSYPJ54D8CFVC4Q","is_oa":true,"landing_page_url":"http://hdl.handle.net/1854/LU-01HSDN7D520JSYPJ54D8CFVC4Q","pdf_url":"https://biblio.ugent.be/publication/01HSDN7D520JSYPJ54D8CFVC4Q/file/01HSDNCXCGP5ME4MD09YMR0GG7.pdf","source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISSN: 2329-9304","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:eprints.ucl.ac.uk.OAI2:10181262","is_oa":true,"landing_page_url":"https://discovery.ucl.ac.uk/id/eprint/10181262/","pdf_url":"https://discovery.ucl.ac.uk/id/eprint/10181262/1/Kang_VoR_Cooperative_Scene-Event_Modelling_for_Acoustic_Scene_Classification.pdf","source":{"id":"https://openalex.org/S4306400024","display_name":"UCL Discovery (University College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45129253","host_organization_name":"University College London","host_organization_lineage":["https://openalex.org/I45129253"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"   IEEE/ACM Transactions on Audio Speech and Language Processing , 32    pp. 68-82.   (2023)      ","raw_type":"Article"}],"best_oa_location":{"id":"doi:10.1109/taslp.2023.3323135","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3323135","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/6633080/10274856.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320327336","display_name":"Vlaamse regering","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387448621.pdf","grobid_xml":"https://content.openalex.org/works/W4387448621.grobid-xml"},"referenced_works_count":55,"referenced_works":["https://openalex.org/W1821462560","https://openalex.org/W2063559185","https://openalex.org/W2086384421","https://openalex.org/W2095705004","https://openalex.org/W2183341477","https://openalex.org/W2187089797","https://openalex.org/W2354870669","https://openalex.org/W2489484968","https://openalex.org/W2593116425","https://openalex.org/W2761246525","https://openalex.org/W2768707988","https://openalex.org/W2775794021","https://openalex.org/W2811482466","https://openalex.org/W2885613075","https://openalex.org/W2908441554","https://openalex.org/W2928717951","https://openalex.org/W2936774411","https://openalex.org/W2937223285","https://openalex.org/W2956637735","https://openalex.org/W2963723765","https://openalex.org/W2963877604","https://openalex.org/W2968661326","https://openalex.org/W2972581694","https://openalex.org/W2982346427","https://openalex.org/W2997308807","https://openalex.org/W3008191852","https://openalex.org/W3015420744","https://openalex.org/W3015700860","https://openalex.org/W3035546924","https://openalex.org/W3092858428","https://openalex.org/W3094550259","https://openalex.org/W3096787185","https://openalex.org/W3103314642","https://openalex.org/W3163881933","https://openalex.org/W3187295906","https://openalex.org/W3196974791","https://openalex.org/W3197459407","https://openalex.org/W3198054860","https://openalex.org/W4312445970","https://openalex.org/W4375868875","https://openalex.org/W4385245566","https://openalex.org/W4387068368","https://openalex.org/W4393792386","https://openalex.org/W6631190155","https://openalex.org/W6638523607","https://openalex.org/W6674330103","https://openalex.org/W6677770173","https://openalex.org/W6679909955","https://openalex.org/W6751795543","https://openalex.org/W6752516136","https://openalex.org/W6753847621","https://openalex.org/W6754058454","https://openalex.org/W6756850094","https://openalex.org/W6765848913","https://openalex.org/W6802938626"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W1590307681","https://openalex.org/W2536018345","https://openalex.org/W4312814274","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2358353312","https://openalex.org/W2353836703"],"abstract_inverted_index":{"Acoustic":[0],"scene":[1],"classification":[2,43],"(ASC)":[3],"can":[4],"be":[5,157,187],"helpful":[6],"for":[7,11,177],"creating":[8],"context":[9],"awareness":[10],"intelligent":[12],"robots.":[13],"Humans":[14],"naturally":[15],"use":[16],"the":[17,82,100,104,110,117,126,131,135,143,161],"relations":[18],"between":[19,58,112,134,146],"acoustic":[20],"scenes":[21,59,114],"(AS)":[22],"and":[23,29,40,60,122,129,137,148,211,239,245],"audio":[24,41,55],"events":[25],"(AE)":[26],"to":[27,79,91,150,156,186],"understand":[28],"recognize":[30],"their":[31,64],"surrounding":[32],"environments.":[33],"However,":[34],"in":[35,125,221],"most":[36],"previous":[37],"works,":[38],"ASC":[39,229],"event":[42],"(AEC)":[44],"are":[45],"treated":[46],"as":[47,230],"independent":[48],"tasks,":[49],"with":[50,95,232],"a":[51,73,173],"focus":[52],"primarily":[53],"on":[54,199,209,228],"features":[56],"shared":[57],"events,":[61],"but":[62],"not":[63],"implicit":[65],"relations.":[66],"To":[67],"address":[68],"this":[69],"limitation,":[70],"we":[71],"propose":[72],"cooperative":[74,178],"scene-event":[75,84,97,181,223],"modelling":[76,98,179],"(cSEM)":[77],"framework":[78],"automatically":[80],"model":[81],"intricate":[83],"relation":[85,144],"by":[86,115],"an":[87],"adaptive":[88],"coupling":[89],"matrix":[90],"improve":[92,151],"ASC.":[93],"Compared":[94],"other":[96,233],"frameworks,":[99],"proposed":[101],"cSEM":[102,206],"offers":[103],"following":[105],"advantages.":[106],"First,":[107],"it":[108,141,171],"reduces":[109],"confusion":[111],"similar":[113],"aligning":[116],"information":[118,133,145,162],"of":[119,163,180],"coarsegrained":[120],"AS":[121,136,147],"fine-grained":[123],"AE":[124,138,149,164],"latent":[127],"space,":[128],"reducing":[130],"redundant":[132],"embeddings.":[139],"Second,":[140],"exploits":[142],"ASC,":[152],"which":[153,183],"is":[154,165,184,207,242],"shown":[155,185],"beneficial,":[158],"even":[159],"if":[160],"derived":[166],"from":[167,195],"unverified":[168],"pseudo-labels.":[169],"Third,":[170],"uses":[172],"regression-based":[174],"loss":[175,192],"function":[176],"relations,":[182],"more":[188],"effective":[189],"than":[190],"classification-based":[191],"functions.":[193],"Instantiated":[194],"four":[196],"models":[197,218],"based":[198],"either":[200],"Transformer":[201],"or":[202,235],"convolutional":[203],"neural":[204],"networks,":[205],"evaluated":[208],"real-life":[210],"synthetic":[212],"datasets.":[213],"Experiments":[214],"show":[215],"that":[216],"cSEM-based":[217],"work":[219],"well":[220],"reallife":[222],"analysis,":[224],"offering":[225],"competitive":[226],"results":[227],"compared":[231],"multi-feature":[234],"multi-model":[236],"ensemble":[237],"methods.TheASCaccuracyachievedontheTUT2018,TAU2019,":[238],"JSSED":[240],"datasets":[241],"81.0%,":[243],"88.9%":[244],"97.2%,":[246],"respectively":[247]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":6}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
