{"id":"https://openalex.org/W4400975545","doi":"https://doi.org/10.1109/taffc.2024.3433386","title":"Versatile Audio-Visual Learning for Emotion Recognition","display_name":"Versatile Audio-Visual Learning for Emotion Recognition","publication_year":2024,"publication_date":"2024-07-25","ids":{"openalex":"https://openalex.org/W4400975545","doi":"https://doi.org/10.1109/taffc.2024.3433386"},"language":"en","primary_location":{"id":"doi:10.1109/taffc.2024.3433386","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taffc.2024.3433386","pdf_url":null,"source":{"id":"https://openalex.org/S104780363","display_name":"IEEE Transactions on Affective Computing","issn_l":"1949-3045","issn":["1949-3045","2371-9850"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Affective Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017966455","display_name":"Lucas Goncalves","orcid":"https://orcid.org/0000-0001-9613-1002"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Lucas Goncalves","raw_affiliation_strings":["Erik Jonsson School of Engineering &#x0026; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","Erik Jonsson School of Engineering &amp; Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":"https://orcid.org/0000-0001-9613-1002","affiliations":[{"raw_affiliation_string":"Erik Jonsson School of Engineering &#x0026; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]},{"raw_affiliation_string":"Erik Jonsson School of Engineering &amp; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024273749","display_name":"Seong-Gyun Leem","orcid":"https://orcid.org/0000-0002-1175-1577"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Seong-Gyun Leem","raw_affiliation_strings":["Erik Jonsson School of Engineering &#x0026; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","Erik Jonsson School of Engineering &amp; Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":"https://orcid.org/0000-0002-1175-1577","affiliations":[{"raw_affiliation_string":"Erik Jonsson School of Engineering &#x0026; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]},{"raw_affiliation_string":"Erik Jonsson School of Engineering &amp; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070819601","display_name":"Wei-Cheng Lin","orcid":"https://orcid.org/0000-0003-1933-1590"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei-Cheng Lin","raw_affiliation_strings":["Erik Jonsson School of Engineering &#x0026; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","Erik Jonsson School of Engineering &amp; Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":"https://orcid.org/0000-0003-1933-1590","affiliations":[{"raw_affiliation_string":"Erik Jonsson School of Engineering &#x0026; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]},{"raw_affiliation_string":"Erik Jonsson School of Engineering &amp; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001303929","display_name":"Berrak \u015ei\u015fman","orcid":"https://orcid.org/0000-0001-8078-3305"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Berrak Sisman","raw_affiliation_strings":["Erik Jonsson School of Engineering &#x0026; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","Erik Jonsson School of Engineering &amp; Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":"https://orcid.org/0000-0001-8078-3305","affiliations":[{"raw_affiliation_string":"Erik Jonsson School of Engineering &#x0026; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]},{"raw_affiliation_string":"Erik Jonsson School of Engineering &amp; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040793194","display_name":"Carlos Busso","orcid":"https://orcid.org/0000-0002-4075-4072"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carlos Busso","raw_affiliation_strings":["Erik Jonsson School of Engineering &#x0026; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","Erik Jonsson School of Engineering &amp; Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":"https://orcid.org/0000-0002-4075-4072","affiliations":[{"raw_affiliation_string":"Erik Jonsson School of Engineering &#x0026; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]},{"raw_affiliation_string":"Erik Jonsson School of Engineering &amp; Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5017966455"],"corresponding_institution_ids":["https://openalex.org/I162577319"],"apc_list":null,"apc_paid":null,"fwci":10.0835,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.985675,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"16","issue":"1","first_page":"306","last_page":"318"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9534000158309937,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9422000050544739,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7795387506484985},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.6479311585426331},{"id":"https://openalex.org/keywords/categorical-variable","display_name":"Categorical variable","score":0.5741525292396545},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5231080055236816},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5122074484825134},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5065010786056519},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.47514867782592773},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4690715968608856},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4579618573188782},{"id":"https://openalex.org/keywords/multimodal-learning","display_name":"Multimodal learning","score":0.43398264050483704},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4267883896827698},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.41890281438827515},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.18569698929786682}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7795387506484985},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.6479311585426331},{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.5741525292396545},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5231080055236816},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5122074484825134},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5065010786056519},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.47514867782592773},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4690715968608856},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4579618573188782},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.43398264050483704},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4267883896827698},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.41890281438827515},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.18569698929786682},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taffc.2024.3433386","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taffc.2024.3433386","pdf_url":null,"source":{"id":"https://openalex.org/S104780363","display_name":"IEEE Transactions on Affective Computing","issn_l":"1949-3045","issn":["1949-3045","2371-9850"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Affective Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":68,"referenced_works":["https://openalex.org/W1981918162","https://openalex.org/W1985867508","https://openalex.org/W2003238582","https://openalex.org/W2030931454","https://openalex.org/W2074788634","https://openalex.org/W2106390385","https://openalex.org/W2143350951","https://openalex.org/W2146334809","https://openalex.org/W2167557160","https://openalex.org/W2341528187","https://openalex.org/W2342475039","https://openalex.org/W2396728763","https://openalex.org/W2526907895","https://openalex.org/W2619383789","https://openalex.org/W2619697695","https://openalex.org/W2673304402","https://openalex.org/W2742542661","https://openalex.org/W2745497104","https://openalex.org/W2803098682","https://openalex.org/W2889090126","https://openalex.org/W2894589512","https://openalex.org/W2894944581","https://openalex.org/W2963260436","https://openalex.org/W2963710346","https://openalex.org/W2964051877","https://openalex.org/W2964109005","https://openalex.org/W2979826702","https://openalex.org/W2995479270","https://openalex.org/W3002732654","https://openalex.org/W3015371781","https://openalex.org/W3016092640","https://openalex.org/W3016138882","https://openalex.org/W3097777922","https://openalex.org/W3108792608","https://openalex.org/W3114214226","https://openalex.org/W3137028092","https://openalex.org/W3154807520","https://openalex.org/W3161541317","https://openalex.org/W3198771897","https://openalex.org/W3209058072","https://openalex.org/W4224932526","https://openalex.org/W4225959162","https://openalex.org/W4283698005","https://openalex.org/W4285250231","https://openalex.org/W4297841781","https://openalex.org/W4312292725","https://openalex.org/W4312384316","https://openalex.org/W4312420631","https://openalex.org/W4312976151","https://openalex.org/W4327852215","https://openalex.org/W4361994820","https://openalex.org/W4372346152","https://openalex.org/W4375869346","https://openalex.org/W4385245566","https://openalex.org/W4385804798","https://openalex.org/W4385805042","https://openalex.org/W4387421359","https://openalex.org/W4390874791","https://openalex.org/W4394606408","https://openalex.org/W6631190155","https://openalex.org/W6686207219","https://openalex.org/W6689779973","https://openalex.org/W6780218876","https://openalex.org/W6790830454","https://openalex.org/W6793736971","https://openalex.org/W6810007534","https://openalex.org/W6838154772","https://openalex.org/W6851525677"],"related_works":["https://openalex.org/W4386799044","https://openalex.org/W2271369634","https://openalex.org/W2047100085","https://openalex.org/W4381827277","https://openalex.org/W3157841754","https://openalex.org/W4390136517","https://openalex.org/W3167558523","https://openalex.org/W3120825179","https://openalex.org/W2999894541","https://openalex.org/W2014028898"],"abstract_inverted_index":{"Most":[0],"current":[1],"audio-visual":[2,93,113,150],"emotion":[3,49,104,107],"recognition":[4,50],"models":[5],"lack":[6],"the":[7,56,132,175,189,195],"flexibility":[8,45],"needed":[9],"for":[10,34,97,103,129],"deployment":[11],"in":[12,46,59,188],"practical":[13],"applications.":[14],"We":[15,110,143],"envision":[16],"a":[17,47,70,91,159,184],"multimodal":[18,48,101],"system":[19,51],"that":[20,115,167],"works":[21],"even":[22,119],"when":[23,120],"only":[24,137,139],"one":[25],"modality":[26],"is":[27,52,68,126,141],"available":[28,128],"and":[29,62,100,122,158,178],"can":[30,116],"be":[31,117],"implemented":[32],"interchangeably":[33],"either":[35],"predicting":[36],"emotional":[37,190],"attributes":[38],"or":[39,76,85,106,138],"recognizing":[40],"categorical":[41],"emotions.":[42],"Achieving":[43],"such":[44],"difficult":[53],"due":[54],"to":[55,72],"inherent":[57],"challenges":[58],"accurately":[60],"interpreting":[61],"integrating":[63],"varied":[64],"data":[65,125],"sources.":[66],"It":[67],"also":[69],"challenge":[71],"robustly":[73],"handle":[74],"missing":[75],"partial":[77],"information":[78],"while":[79],"allowing":[80],"direct":[81],"switch":[82],"between":[83],"regression":[84,105],"classification":[86,108],"tasks.":[87,109],"This":[88],"study":[89],"proposes":[90],"versatile":[92],"learning":[94,148],"(VAVL)":[95],"framework":[96,114],"handling":[98],"unimodal":[99,160],"systems":[102],"implement":[111],"an":[112],"trained":[118],"audio":[121,136],"visual":[123],"paired":[124],"not":[127],"part":[130],"of":[131],"training":[133],"set":[134],"(i.e.,":[135],"video":[140],"present).":[142],"achieve":[144],"this":[145],"effective":[146],"representation":[147],"with":[149],"shared":[151,156],"layers,":[152,157],"residual":[153],"connections":[154],"over":[155],"reconstruction":[161],"task.":[162],"Our":[163],"experimental":[164],"results":[165],"reveal":[166],"our":[168],"architecture":[169],"significantly":[170],"outperforms":[171],"strong":[172],"baselines":[173],"on":[174,194],"CREMA-D,":[176],"MSP-IMPROV,":[177],"CMU-MOSEI":[179],"corpora.":[180],"Notably,":[181],"VAVL":[182],"attains":[183],"new":[185],"state-of-the-art":[186],"performance":[187],"attribute":[191],"prediction":[192],"task":[193],"MSP-IMPROV":[196],"corpus.":[197]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
