{"id":"https://openalex.org/W4392903856","doi":"https://doi.org/10.1109/icassp48485.2024.10447640","title":"AttA-NET: Attention Aggregation Network for Audio-Visual Emotion Recognition","display_name":"AttA-NET: Attention Aggregation Network for Audio-Visual Emotion Recognition","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903856","doi":"https://doi.org/10.1109/icassp48485.2024.10447640"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447640","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447640","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102630119","display_name":"Ruijia Fan","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ruijia Fan","raw_affiliation_strings":["Peking University,Shenzhen Graduate School,National Key Laboratory of General Artificial Intelligence,China","National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China"],"affiliations":[{"raw_affiliation_string":"Peking University,Shenzhen Graduate School,National Key Laboratory of General Artificial Intelligence,China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100410326","display_name":"Hong Liu","orcid":"https://orcid.org/0000-0002-7498-6541"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Liu","raw_affiliation_strings":["Peking University,Shenzhen Graduate School,National Key Laboratory of General Artificial Intelligence,China","National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China"],"affiliations":[{"raw_affiliation_string":"Peking University,Shenzhen Graduate School,National Key Laboratory of General Artificial Intelligence,China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071411382","display_name":"Yidi Li","orcid":"https://orcid.org/0000-0002-5236-7010"},"institutions":[{"id":"https://openalex.org/I9086337","display_name":"Taiyuan University of Technology","ror":"https://ror.org/03kv08d37","country_code":"CN","type":"education","lineage":["https://openalex.org/I9086337"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yidi Li","raw_affiliation_strings":["Taiyuan University of Technology,College of Computer Science and Technology,China","National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China","College of Computer Science and Technology, Taiyuan University of Technology, China"],"affiliations":[{"raw_affiliation_string":"Taiyuan University of Technology,College of Computer Science and Technology,China","institution_ids":["https://openalex.org/I9086337"]},{"raw_affiliation_string":"National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"College of Computer Science and Technology, Taiyuan University of Technology, China","institution_ids":["https://openalex.org/I9086337"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085600464","display_name":"Peini Guo","orcid":"https://orcid.org/0000-0002-6654-1550"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peini Guo","raw_affiliation_strings":["Peking University,Shenzhen Graduate School,National Key Laboratory of General Artificial Intelligence,China","National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China"],"affiliations":[{"raw_affiliation_string":"Peking University,Shenzhen Graduate School,National Key Laboratory of General Artificial Intelligence,China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101750094","display_name":"Guoquan Wang","orcid":"https://orcid.org/0009-0001-2401-7435"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoquan Wang","raw_affiliation_strings":["Peking University,Shenzhen Graduate School,National Key Laboratory of General Artificial Intelligence,China","National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China"],"affiliations":[{"raw_affiliation_string":"Peking University,Shenzhen Graduate School,National Key Laboratory of General Artificial Intelligence,China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102800725","display_name":"Ti Wang","orcid":"https://orcid.org/0000-0002-1062-1252"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ti Wang","raw_affiliation_strings":["Peking University,Shenzhen Graduate School,National Key Laboratory of General Artificial Intelligence,China","National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China"],"affiliations":[{"raw_affiliation_string":"Peking University,Shenzhen Graduate School,National Key Laboratory of General Artificial Intelligence,China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"National Key Laboratory of General Artificial Intelligence, Shenzhen Graduate School, Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102630119"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":4.8561,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.95700594,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"8030","last_page":"8034"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7829263210296631},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.745773196220398},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.6309360861778259},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6288230419158936},{"id":"https://openalex.org/keywords/atta","display_name":"Atta","score":0.6092240214347839},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5779101848602295},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5537760853767395},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5479294657707214},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4840394854545593},{"id":"https://openalex.org/keywords/net","display_name":"Net (polyhedron)","score":0.41386252641677856},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36026155948638916},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35876280069351196},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3394090533256531},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.09981048107147217}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7829263210296631},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.745773196220398},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.6309360861778259},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6288230419158936},{"id":"https://openalex.org/C2780975723","wikidata":"https://www.wikidata.org/wiki/Q1376113","display_name":"Atta","level":3,"score":0.6092240214347839},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5779101848602295},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5537760853767395},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5479294657707214},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4840394854545593},{"id":"https://openalex.org/C14166107","wikidata":"https://www.wikidata.org/wiki/Q253829","display_name":"Net (polyhedron)","level":2,"score":0.41386252641677856},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36026155948638916},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35876280069351196},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3394090533256531},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.09981048107147217},{"id":"https://openalex.org/C2780653484","wikidata":"https://www.wikidata.org/wiki/Q22651","display_name":"Hymenoptera","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447640","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447640","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2069976350","https://openalex.org/W2187089797","https://openalex.org/W2964051877","https://openalex.org/W2985076077","https://openalex.org/W3005450949","https://openalex.org/W3016138882","https://openalex.org/W3034849760","https://openalex.org/W3035570025","https://openalex.org/W3081192838","https://openalex.org/W3093051361","https://openalex.org/W3112864462","https://openalex.org/W3169801598","https://openalex.org/W3172353912","https://openalex.org/W3175546442","https://openalex.org/W3213879871","https://openalex.org/W4205633160","https://openalex.org/W4293517975","https://openalex.org/W4312292725","https://openalex.org/W4312596733","https://openalex.org/W4313071174","https://openalex.org/W4313349512","https://openalex.org/W4377971642","https://openalex.org/W4385245566","https://openalex.org/W6787666870"],"related_works":["https://openalex.org/W2794449955","https://openalex.org/W2953263491","https://openalex.org/W1988139253","https://openalex.org/W1963958948","https://openalex.org/W2950464342","https://openalex.org/W4206205253","https://openalex.org/W4220792858","https://openalex.org/W2403134494","https://openalex.org/W2473445922","https://openalex.org/W2950145634"],"abstract_inverted_index":{"In":[0,51],"video-based":[1],"emotion":[2],"recognition,":[3],"effective":[4],"multi-modal":[5,119],"fusion":[6,22],"techniques":[7],"are":[8,24,110],"essential":[9],"to":[10,61,71,101],"leverage":[11],"the":[12,35,44,135],"complementary":[13],"relationship":[14],"between":[15],"audio":[16,39,89],"and":[17,40,43,82,90,118,131],"visual":[18,41,91],"modalities.":[19],"Recent":[20],"attention-based":[21],"methods":[23],"widely":[25],"leveraged":[26],"for":[27],"capturing":[28],"modal-shared":[29,73],"properties.":[30],"However,":[31],"they":[32],"often":[33],"ignore":[34],"modal-specific":[36,103,122],"properties":[37,74],"of":[38,46,116,137],"modalities":[42],"unalignment":[45],"model-shared":[47],"emotional":[48],"semantic":[49,92],"features.":[50,93],"this":[52],"paper,":[53],"an":[54,95],"Attention":[55],"Aggregation":[56],"Network":[57],"(AttA-NET)":[58],"is":[59,69,99,141],"proposed":[60,70],"address":[62],"these":[63],"challenges.":[64],"An":[65],"attention":[66],"aggregation":[67],"module":[68,77],"get":[72],"effectively.":[75],"This":[76],"comprises":[78],"similarity-aware":[79],"enhancement":[80],"blocks":[81],"a":[83],"contrastive":[84],"loss":[85],"that":[86],"facilitates":[87],"aligning":[88],"Moreover,":[94],"auxiliary":[96],"uni-modal":[97,117],"classifier":[98],"introduced":[100],"obtain":[102],"properties,":[104],"in":[105],"which":[106],"intra-modal":[107],"discriminative":[108],"features":[109],"fully":[111],"extracted.":[112],"Under":[113],"joint":[114],"optimization":[115],"classification":[120],"loss,":[121],"information":[123],"can":[124],"be":[125],"infused.":[126],"Extensive":[127],"experiments":[128],"on":[129],"RAVDESS":[130],"PKU-ER":[132],"datasets":[133],"validate":[134],"superiority":[136],"AttA-NET.":[138],"The":[139],"code":[140],"available":[142],"at:":[143],"https://github.com/NariFan2002/AttA-NET.":[144]},"counts_by_year":[{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
