{"id":"https://openalex.org/W4312756816","doi":"https://doi.org/10.1109/mmsp55362.2022.9949447","title":"Audio-visual scene classification via contrastive event-object alignment and semantic-based fusion","display_name":"Audio-visual scene classification via contrastive event-object alignment and semantic-based fusion","publication_year":2022,"publication_date":"2022-09-26","ids":{"openalex":"https://openalex.org/W4312756816","doi":"https://doi.org/10.1109/mmsp55362.2022.9949447"},"language":"en","primary_location":{"id":"doi:10.1109/mmsp55362.2022.9949447","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp55362.2022.9949447","pdf_url":null,"source":{"id":"https://openalex.org/S4363605768","display_name":"2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://biblio.ugent.be/publication/01GMBADMAFC9JC0NFKCCNCS52C/file/01GMBAFK53Z2GNMWXRV711WPY5.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063248625","display_name":"Yuanbo Hou","orcid":"https://orcid.org/0000-0001-8469-5740"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Yuanbo Hou","raw_affiliation_strings":["Ghent University,WAVES Research Group,Gent,Belgium","WAVES Research Group, Ghent University, Gent, Belgium"],"affiliations":[{"raw_affiliation_string":"Ghent University,WAVES Research Group,Gent,Belgium","institution_ids":["https://openalex.org/I32597200"]},{"raw_affiliation_string":"WAVES Research Group, Ghent University, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090926391","display_name":"Bo Kang","orcid":"https://orcid.org/0000-0002-9895-9927"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]},{"id":"https://openalex.org/I12607205","display_name":"University College Ghent","ror":"https://ror.org/00rs45z86","country_code":"BE","type":"education","lineage":["https://openalex.org/I12607205"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Bo Kang","raw_affiliation_strings":["Ghent University,IDLAB,Gent,Belgium","IDLAB, Ghent University, Gent, Belgium"],"affiliations":[{"raw_affiliation_string":"Ghent University,IDLAB,Gent,Belgium","institution_ids":["https://openalex.org/I32597200","https://openalex.org/I12607205"]},{"raw_affiliation_string":"IDLAB, Ghent University, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069519911","display_name":"Dick Botteldooren","orcid":"https://orcid.org/0000-0002-7756-7238"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Dick Botteldooren","raw_affiliation_strings":["Ghent University,WAVES Research Group,Gent,Belgium","WAVES Research Group, Ghent University, Gent, Belgium"],"affiliations":[{"raw_affiliation_string":"Ghent University,WAVES Research Group,Gent,Belgium","institution_ids":["https://openalex.org/I32597200"]},{"raw_affiliation_string":"WAVES Research Group, Ghent University, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5063248625"],"corresponding_institution_ids":["https://openalex.org/I32597200"],"apc_list":null,"apc_paid":null,"fwci":0.3677,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.53554502,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13996","display_name":"Diverse Musicological Studies","score":0.9383999705314636,"subfield":{"id":"https://openalex.org/subfields/1210","display_name":"Music"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7779359221458435},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.7599996328353882},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6214956045150757},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.5593368411064148},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.5232502222061157},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5019805431365967},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.44613367319107056},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.42613011598587036},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.13059794902801514}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7779359221458435},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.7599996328353882},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6214956045150757},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5593368411064148},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.5232502222061157},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5019805431365967},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.44613367319107056},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.42613011598587036},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.13059794902801514},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/mmsp55362.2022.9949447","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp55362.2022.9949447","pdf_url":null,"source":{"id":"https://openalex.org/S4363605768","display_name":"2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:archive.ugent.be:01GMBADMAFC9JC0NFKCCNCS52C","is_oa":true,"landing_page_url":"http://hdl.handle.net/1854/LU-01GMBADMAFC9JC0NFKCCNCS52C","pdf_url":"https://biblio.ugent.be/publication/01GMBADMAFC9JC0NFKCCNCS52C/file/01GMBAFK53Z2GNMWXRV711WPY5.pdf","source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISBN: 9781665471893","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:archive.ugent.be:01GMBADMAFC9JC0NFKCCNCS52C","is_oa":true,"landing_page_url":"http://hdl.handle.net/1854/LU-01GMBADMAFC9JC0NFKCCNCS52C","pdf_url":"https://biblio.ugent.be/publication/01GMBADMAFC9JC0NFKCCNCS52C/file/01GMBAFK53Z2GNMWXRV711WPY5.pdf","source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISBN: 9781665471893","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4312756816.pdf"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W2071524685","https://openalex.org/W2095705004","https://openalex.org/W2103235956","https://openalex.org/W2108598243","https://openalex.org/W2133273100","https://openalex.org/W2140647972","https://openalex.org/W2219322027","https://openalex.org/W2467206671","https://openalex.org/W2551397753","https://openalex.org/W2566935005","https://openalex.org/W2592962403","https://openalex.org/W2593116425","https://openalex.org/W2755125693","https://openalex.org/W2886877375","https://openalex.org/W2889326414","https://openalex.org/W2894973873","https://openalex.org/W2908510526","https://openalex.org/W3160506022","https://openalex.org/W3161541317","https://openalex.org/W3161753238","https://openalex.org/W3162391496","https://openalex.org/W3164791629","https://openalex.org/W3166396011","https://openalex.org/W3170874841","https://openalex.org/W3185617735","https://openalex.org/W3196974791","https://openalex.org/W4226346350","https://openalex.org/W4312443924","https://openalex.org/W4312445970","https://openalex.org/W4385245566","https://openalex.org/W6674330103","https://openalex.org/W6676297131","https://openalex.org/W6720039734","https://openalex.org/W6730105927","https://openalex.org/W6734260513","https://openalex.org/W6739901393","https://openalex.org/W6753847621","https://openalex.org/W6788135285","https://openalex.org/W6791353385","https://openalex.org/W6792861227","https://openalex.org/W6795710243","https://openalex.org/W6885196408"],"related_works":["https://openalex.org/W3000097931","https://openalex.org/W2354322770","https://openalex.org/W4237547500","https://openalex.org/W1570848052","https://openalex.org/W2373192430","https://openalex.org/W4239268388","https://openalex.org/W4243305035","https://openalex.org/W1537496349","https://openalex.org/W2379407973","https://openalex.org/W2350267540"],"abstract_inverted_index":{"Previous":[0],"works":[1],"on":[2,8,24,37,230],"scene":[3,26,86],"classification":[4,27],"are":[5,174,209],"mainly":[6],"based":[7],"audio":[9,32,49,72,89,106,119,150,169,224],"or":[10,55],"visual":[11,53,75,92,109,122,153,164,228],"signals,":[12],"while":[13,81],"humans":[14,82],"perceive":[15],"the":[16,30,38,44,48,52,57,64,70,96,102,115,146,157,188,198,206,212,221,235,247],"environmental":[17],"scenes":[18,79],"through":[19,87],"multiple":[20],"senses.":[21],"Recent":[22],"studies":[23],"audio-visual":[25,78,160,207],"separately":[28],"fine-tune":[29],"large-scale":[31,244],"and":[33,51,74,91,95,108,113,121,136,152,171,178,195,202,226,234,258],"image":[34],"pre-trained":[35],"models":[36,62],"target":[39],"dataset,":[40],"then":[41],"either":[42],"fuse":[43,56],"intermediate":[45],"representations":[46],"of":[47,60,105,149,200,223],"model":[50,129,191,249],"model,":[54],"coarse-grained":[58],"decision":[59],"both":[61,88],"at":[63],"clip":[65],"level.":[66],"Such":[67],"methods":[68],"ignore":[69],"detailed":[71],"events":[73,90,107,120,151,170,225],"objects":[76,93,110,154,165,229],"in":[77,111],"(AVS),":[80],"often":[83],"identify":[84],"a":[85,127,215,231],"within,":[94],"congruence":[97],"between":[98,118,159],"them.":[99],"To":[100],"exploit":[101],"fine-grained":[103,232],"information":[104],"AVS,":[112],"coordinate":[114],"implicit":[116],"relationship":[117],"objects,":[123],"this":[124],"paper":[125],"proposes":[126],"multi-branch":[128],"equipped":[130,192],"with":[131,167,193,242],"contrastive":[132],"event-object":[133,161],"alignment":[134],"(CEOA)":[135],"semantic-based":[137],"fusion":[138],"(SF)":[139],"for":[140,181],"AVSC.":[141],"CEOA":[142,194,219],"aims":[143],"to":[144],"align":[145],"learned":[147],"embeddings":[148,222],"by":[155,176],"comparing":[156],"difference":[158],"pairs.":[162],"Then,":[163],"associated":[166],"certain":[168],"vice":[172],"versa":[173],"accentuated":[175],"cross-attention":[177],"undergo":[179],"SF":[180,196,236],"semantic-level":[182],"fusion.":[183],"Experiments":[184],"show":[185],"that:":[186],"1)":[187],"proposed":[189,248],"AVSC":[190],"outperforms":[197],"results":[199,208,213],"audio-only":[201],"visual-only":[203],"models,":[204],"i.e.,":[205],"better":[210],"than":[211],"from":[214],"single":[216],"modality.":[217],"2)":[218],"aligns":[220],"related":[227],"level,":[233],"effectively":[237],"integrates":[238],"both;":[239],"3)":[240],"Compared":[241],"other":[243],"integrated":[245],"systems,":[246],"shows":[250],"competitive":[251],"performance,":[252],"even":[253],"without":[254],"using":[255],"additional":[256],"datasets":[257],"data":[259],"augmentation":[260],"tricks.":[261]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
