{"id":"https://openalex.org/W4415536790","doi":"https://doi.org/10.1145/3746027.3754966","title":"Discrepancy-Aware Attention Network for Enhanced Audio-Visual Generalized Zero-Shot Learning","display_name":"Discrepancy-Aware Attention Network for Enhanced Audio-Visual Generalized Zero-Shot Learning","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415536790","doi":"https://doi.org/10.1145/3746027.3754966"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3754966","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754966","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108562061","display_name":"Ruihan Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Runlin Yu","raw_affiliation_strings":["Central South University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Central South University, Changsha, China","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113745312","display_name":"Yuyan Gong","orcid":null},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yipu Gong","raw_affiliation_strings":["Central South University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Central South University, Changsha, China","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046442204","display_name":"Wenrui Li","orcid":"https://orcid.org/0000-0002-2393-9016"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenrui Li","raw_affiliation_strings":["Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120131339","display_name":"Aiwen Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Aiwen Sun","raw_affiliation_strings":["Central South University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Central South University, Changsha, China","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021244872","display_name":"Meng Cong Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengren Zheng","raw_affiliation_strings":["Chong Qing University, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"Chong Qing University, Chongqing, China","institution_ids":["https://openalex.org/I158842170"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5108562061"],"corresponding_institution_ids":["https://openalex.org/I139660479"],"apc_list":null,"apc_paid":null,"fwci":1.4872,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86653572,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1112","last_page":"1121"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9860000014305115,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9860000014305115,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9797999858856201,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9539999961853027,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7522000074386597},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6754999756813049},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.6488999724388123},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5986999869346619},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4560000002384186},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.43459999561309814},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4198000133037567},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.3801000118255615}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7522000074386597},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7141000032424927},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6754999756813049},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.6488999724388123},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6359999775886536},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5986999869346619},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5170999765396118},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4560000002384186},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.43459999561309814},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4198000133037567},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3801000118255615},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.37400001287460327},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.3686000108718872},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3546000123023987},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3424000144004822},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.33149999380111694},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.30720001459121704},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2703000009059906},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.25619998574256897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3754966","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754966","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1927052826","https://openalex.org/W2910453440","https://openalex.org/W2963499153","https://openalex.org/W3015371781","https://openalex.org/W3025520547","https://openalex.org/W3035333188","https://openalex.org/W3113673510","https://openalex.org/W3138521398","https://openalex.org/W3196591432","https://openalex.org/W3207833857","https://openalex.org/W4206239741","https://openalex.org/W4304091961","https://openalex.org/W4313477296","https://openalex.org/W4385488559","https://openalex.org/W4386076642","https://openalex.org/W4386159996","https://openalex.org/W4387831678","https://openalex.org/W4387968058","https://openalex.org/W4387968071","https://openalex.org/W4392796720","https://openalex.org/W4392970297","https://openalex.org/W4393147886","https://openalex.org/W4396214409","https://openalex.org/W4400651929","https://openalex.org/W4400903918","https://openalex.org/W4401794499","https://openalex.org/W4402660166","https://openalex.org/W4402832821","https://openalex.org/W4402917184","https://openalex.org/W4403780846","https://openalex.org/W4403791807","https://openalex.org/W4403904291","https://openalex.org/W4406552230","https://openalex.org/W4406860152","https://openalex.org/W4407632488","https://openalex.org/W4408100294","https://openalex.org/W4413158308","https://openalex.org/W4414110709"],"related_works":[],"abstract_inverted_index":{"Audio-visual":[0],"Generalized":[1],"Zero-Shot":[2],"Learning":[3],"((G)ZSL)":[4],"has":[5],"attracted":[6],"significant":[7,83],"attention":[8],"for":[9,36,65,96,145],"its":[10],"ability":[11],"to":[12,27,44,109,127],"identify":[13],"unseen":[14,37],"classes":[15],"in":[16,24,117,150],"general":[17],"video":[18],"classification":[19],"tasks.":[20],"However,":[21],"modality":[22,81,137],"imbalance":[23],"(G)ZSL":[25],"leads":[26],"over-reliance":[28],"on":[29,158],"the":[30,66,73,79,165],"optimal":[31],"modality,":[32],"reducing":[33],"discriminative":[34],"capabilities":[35],"classes.":[38],"Though":[39],"recent":[40],"studies":[41,163],"have":[42],"attempted":[43],"address":[45,86],"this":[46],"issue,":[47],"two":[48],"challenges":[49],"still":[50],"remain":[51],"unsolved:":[52],"(a)":[53],"Quality":[54],"discrepancies,":[55,71],"where":[56,72],"modalities":[57,118],"offer":[58],"differing":[59],"quantities":[60],"and":[61,119,131,142],"qualities":[62],"of":[63,75,167],"information":[64,116],"same":[67,80],"concept.":[68],"(b)":[69],"Content":[70],"contributions":[74,138],"different":[76],"samples":[77],"within":[78],"exhibit":[82],"differences.":[84],"To":[85],"these":[87],"challenges,":[88],"we":[89],"propose":[90],"a":[91,103,120],"Discrepancy-Aware":[92],"Attention":[93,106],"Network":[94],"(DAAN)":[95],"Enhanced":[97],"Audio-Visual":[98],"(G)ZSL.":[99],"Our":[100],"approach":[101],"introduces":[102],"Redundant-Noise":[104],"Mitigation":[105],"(RNMA)":[107],"unit":[108],"minimize":[110],"content":[111],"discrepancies":[112],"by":[113,139],"mitigating":[114],"redundant":[115],"Contrastive":[121],"Sample":[122],"Gradient":[123],"Modulation":[124],"(CSGM)":[125],"mechanism":[126],"adjust":[128],"gradient":[129,148],"magnitudes":[130],"balance":[132],"quality":[133],"discrepancies.":[134],"We":[135],"quantify":[136],"integrating":[140],"optimization":[141],"convergence":[143],"rate":[144],"more":[146],"precise":[147],"modulation":[149],"CSGM.":[151],"Experiments":[152],"demonstrate":[153],"DAAN":[154],"achieves":[155],"state-of-the-art":[156],"performance":[157],"benchmark":[159],"datasets,":[160],"with":[161],"ablation":[162],"validating":[164],"effectiveness":[166],"individual":[168],"modules.":[169],"Code":[170],"is":[171],"available":[172],"at":[173],"https://github.com/xiaoxinning/DAAN-GZSL.":[174]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-25T00:00:00"}
