{"id":"https://openalex.org/W4416251630","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227260","title":"VLLMs Provide Better Context for Emotion Understanding Through Common Sense Reasoning","display_name":"VLLMs Provide Better Context for Emotion Understanding Through Common Sense Reasoning","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416251630","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227260"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11227260","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227260","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082235742","display_name":"Alexandros Xenos","orcid":"https://orcid.org/0000-0001-6242-8916"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Alexandros Xenos","raw_affiliation_strings":["Queen Mary University of London,London,UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,London,UK","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080786995","display_name":"Niki Maria Foteinopoulou","orcid":"https://orcid.org/0000-0003-4481-9360"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Niki M. Foteinopoulou","raw_affiliation_strings":["Queen Mary University of London,London,UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,London,UK","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041928666","display_name":"\u0399\u03c9\u03ac\u03bd\u03bd\u03b1 \u039d\u03c4\u03af\u03bd\u03bf\u03c5","orcid":"https://orcid.org/0000-0001-6634-455X"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ioanna Ntinou","raw_affiliation_strings":["Queen Mary University of London,London,UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,London,UK","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031205865","display_name":"Ioannis Patras","orcid":"https://orcid.org/0000-0003-3913-4738"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ioannis Patras","raw_affiliation_strings":["Queen Mary University of London,London,UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,London,UK","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024224610","display_name":"Georgios Tzimiropoulos","orcid":"https://orcid.org/0000-0002-1803-5338"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Georgios Tzimiropoulos","raw_affiliation_strings":["Queen Mary University of London,London,UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,London,UK","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.4863,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.94839891,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.6205999851226807,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.6205999851226807,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.21130000054836273,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.09510000050067902,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6245999932289124},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.508400022983551},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5045999884605408},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4966000020503998},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4927999973297119},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.4708000123500824},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.4456000030040741},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.43369999527931213},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.41850000619888306}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.728600025177002},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6245999932289124},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5733000040054321},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.508400022983551},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5045999884605408},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4966000020503998},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4927999973297119},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.48089998960494995},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.4708000123500824},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.4456000030040741},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.43369999527931213},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.41850000619888306},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.41040000319480896},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4083000123500824},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.3815000057220459},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.36980000138282776},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3637999892234802},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.36010000109672546},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3499000072479248},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.3296000063419342},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.31779998540878296},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3127000033855438},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3057999908924103},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.29899999499320984},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.29339998960494995},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2863999903202057},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.2644999921321869},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.25929999351501465}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11227260","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227260","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W2056621158","https://openalex.org/W2103153725","https://openalex.org/W2149095485","https://openalex.org/W2164186291","https://openalex.org/W2740693122","https://openalex.org/W2804436848","https://openalex.org/W2888816955","https://openalex.org/W2964751875","https://openalex.org/W3001529617","https://openalex.org/W3034520808","https://openalex.org/W3035497134","https://openalex.org/W3100307207","https://openalex.org/W3118310928","https://openalex.org/W3127166971","https://openalex.org/W3135844897","https://openalex.org/W3139068353","https://openalex.org/W3162758083","https://openalex.org/W3175546442","https://openalex.org/W3176899168","https://openalex.org/W3184134865","https://openalex.org/W4210412034","https://openalex.org/W4221108109","https://openalex.org/W4285529412","https://openalex.org/W4304092664","https://openalex.org/W4304098327","https://openalex.org/W4312609734","https://openalex.org/W4385804947","https://openalex.org/W4386072325","https://openalex.org/W4386075843","https://openalex.org/W4400527853","https://openalex.org/W4402713102","https://openalex.org/W4402727764","https://openalex.org/W4402753737","https://openalex.org/W4402979749","https://openalex.org/W4404782618","https://openalex.org/W4404788931","https://openalex.org/W4412602922","https://openalex.org/W4415795657"],"related_works":[],"abstract_inverted_index":{"Recognising":[0],"emotions":[1,9],"in":[2,73,102],"context":[3],"involves":[4],"identifying":[5],"an":[6],"individual\u2019s":[7],"apparent":[8,100],"while":[10],"considering":[11],"contextual":[12,43],"cues":[13],"from":[14,54],"the":[15,62,98,105,109,113,130,139,151,156,160],"surrounding":[16],"scene.":[17],"Previous":[18],"approaches":[19],"to":[20,51,68,92,104,118,158,168],"this":[21,58],"task":[22],"have":[23],"typically":[24],"designed":[25],"explicit":[26],"scene-encoding":[27],"architectures":[28],"or":[29,45],"incorporated":[30],"external":[31],"scene-related":[32],"information,":[33],"such":[34],"as":[35],"captions.":[36],"However,":[37],"these":[38],"methods":[39],"often":[40],"utilise":[41],"limited":[42],"information":[44],"rely":[46],"on":[47],"intricate":[48],"training":[49,140],"pipelines":[50],"decouple":[52],"noise":[53],"relevant":[55],"information.":[56],"In":[57],"work,":[59],"we":[60,89],"leverage":[61],"capabilities":[63],"of":[64,97],"Vision-and-Large-Language":[65],"Models":[66],"(VLLMs)":[67],"enhance":[69],"in-context":[70],"emotion":[71,101],"classification":[72,132],"a":[74,82,120],"more":[75],"straightforward":[76],"manner.":[77],"Our":[78,172,188],"proposed":[79],"method":[80,135],"follows":[81],"simple":[83],"yet":[84],"effective":[85],"two-stage":[86],"approach.":[87],"First,":[88],"prompt":[90],"VLLMs":[91],"generate":[93],"natural":[94],"language":[95],"descriptions":[96,153],"subject\u2019s":[99],"relation":[103],"visual":[106,114,127,162],"context.":[107],"Second,":[108],"descriptions,":[110],"along":[111],"with":[112],"input,":[115,163],"are":[116],"used":[117],"train":[119],"transformer-based":[121],"architecture":[122,167],"that":[123,150],"fuses":[124],"text":[125],"and":[126,182,186],"features":[128],"before":[129],"final":[131],"task.":[133],"This":[134],"not":[136],"only":[137],"simplifies":[138],"process":[141],"but":[142],"also":[143],"significantly":[144],"improves":[145],"performance.":[146],"Experimental":[147],"results":[148],"demonstrate":[149],"textual":[152],"effectively":[154],"guide":[155],"model":[157],"constrain":[159],"noisy":[161],"allowing":[164],"our":[165],"fused":[166],"outperform":[169],"individual":[170],"modalities.":[171],"approach":[173],"achieves":[174],"state-of-the-art":[175],"performance":[176],"across":[177],"three":[178],"datasets,":[179],"BoLD,":[180],"EMOTIC,":[181],"CAER-S,":[183],"without":[184],"bells":[185],"whistles.":[187],"code":[189],"will":[190],"be":[191],"made":[192],"publicly":[193],"available.":[194]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-14T00:00:00"}
