{"id":"https://openalex.org/W4412444993","doi":"https://doi.org/10.1109/tcsvt.2025.3588892","title":"Visual and Textual Prompts in VLLMs for Enhancing Emotion Recognition","display_name":"Visual and Textual Prompts in VLLMs for Enhancing Emotion Recognition","publication_year":2025,"publication_date":"2025-07-15","ids":{"openalex":"https://openalex.org/W4412444993","doi":"https://doi.org/10.1109/tcsvt.2025.3588892"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2025.3588892","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3588892","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zhifeng Wang","orcid":"https://orcid.org/0000-0003-0392-5757"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Zhifeng Wang","raw_affiliation_strings":["School of Computing, Australian National University (ANU), Canberra, ACT, Australia","School of Computing, Australian National University (ANU) Canberra, ACT, Australia"],"raw_orcid":"https://orcid.org/0000-0003-0392-5757","affiliations":[{"raw_affiliation_string":"School of Computing, Australian National University (ANU), Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]},{"raw_affiliation_string":"School of Computing, Australian National University (ANU) Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038142929","display_name":"Qixuan Zhang","orcid":"https://orcid.org/0000-0002-4837-7152"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Qixuan Zhang","raw_affiliation_strings":["School of Computing, Australian National University (ANU), Canberra, ACT, Australia","School of Computing, Australian National University (ANU) Canberra, ACT, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing, Australian National University (ANU), Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]},{"raw_affiliation_string":"School of Computing, Australian National University (ANU) Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101435916","display_name":"Peter Zhang","orcid":"https://orcid.org/0000-0002-0422-834X"},"institutions":[{"id":"https://openalex.org/I2799746221","display_name":"University of Divinity","ror":"https://ror.org/02xn8bh65","country_code":"AU","type":"education","lineage":["https://openalex.org/I2799746221"]},{"id":"https://openalex.org/I4210098511","display_name":"Office of the Chief Scientist","ror":"https://ror.org/00f21s284","country_code":"IL","type":"government","lineage":["https://openalex.org/I4210098511","https://openalex.org/I4210113586"]}],"countries":["AU","IL"],"is_corresponding":false,"raw_author_name":"Peter Zhang","raw_affiliation_strings":["Quriosity Pty Ltd., Melbourne, VIC, Australia","senior AI scientist at the Quriosity Pty Ltd, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Quriosity Pty Ltd., Melbourne, VIC, Australia","institution_ids":["https://openalex.org/I2799746221"]},{"raw_affiliation_string":"senior AI scientist at the Quriosity Pty Ltd, Australia","institution_ids":["https://openalex.org/I4210098511"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037200751","display_name":"Wenjia Niu","orcid":"https://orcid.org/0000-0003-1506-830X"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Wenjia Niu","raw_affiliation_strings":["School of Computing, Australian National University (ANU), Canberra, ACT, Australia","School of Computing, Australian National University (ANU) Canberra, ACT, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing, Australian National University (ANU), Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]},{"raw_affiliation_string":"School of Computing, Australian National University (ANU) Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103144603","display_name":"Kaihao Zhang","orcid":"https://orcid.org/0000-0002-4317-660X"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Kaihao Zhang","raw_affiliation_strings":["School of Computing, Australian National University (ANU), Canberra, ACT, Australia","School of Computing, Australian National University (ANU) Canberra, ACT, Australia"],"raw_orcid":"https://orcid.org/0000-0002-4317-660X","affiliations":[{"raw_affiliation_string":"School of Computing, Australian National University (ANU), Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]},{"raw_affiliation_string":"School of Computing, Australian National University (ANU) Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073093333","display_name":"Ramesh Sankaranarayana","orcid":"https://orcid.org/0000-0002-7001-5369"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Ramesh Sankaranarayana","raw_affiliation_strings":["School of Computing, Australian National University (ANU), Canberra, ACT, Australia","School of Computing, Australian National University (ANU) Canberra, ACT, Australia"],"raw_orcid":"https://orcid.org/0000-0002-7001-5369","affiliations":[{"raw_affiliation_string":"School of Computing, Australian National University (ANU), Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]},{"raw_affiliation_string":"School of Computing, Australian National University (ANU) Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032320901","display_name":"Sabrina Caldwell","orcid":"https://orcid.org/0000-0003-0605-3149"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Sabrina Caldwell","raw_affiliation_strings":["School of Computing, Australian National University (ANU), Canberra, ACT, Australia","School of Computing, Australian National University (ANU) Canberra, ACT, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing, Australian National University (ANU), Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]},{"raw_affiliation_string":"School of Computing, Australian National University (ANU) Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030379402","display_name":"Tom Gedeon","orcid":"https://orcid.org/0000-0001-8356-4909"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]},{"id":"https://openalex.org/I205640436","display_name":"Curtin University","ror":"https://ror.org/02n415q13","country_code":"AU","type":"education","lineage":["https://openalex.org/I205640436"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Tom Gedeon","raw_affiliation_strings":["Human-Centric Advancements Chair in AI, Curtin University, Perth, WA, Australia","Human-Centric Advancements Chair in AI, Curtin University and Australian National University, Australia"],"raw_orcid":"https://orcid.org/0000-0001-8356-4909","affiliations":[{"raw_affiliation_string":"Human-Centric Advancements Chair in AI, Curtin University, Perth, WA, Australia","institution_ids":["https://openalex.org/I205640436"]},{"raw_affiliation_string":"Human-Centric Advancements Chair in AI, Curtin University and Australian National University, Australia","institution_ids":["https://openalex.org/I118347636"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I118347636"],"apc_list":null,"apc_paid":null,"fwci":7.3399,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.97056502,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"35","issue":"12","first_page":"12355","last_page":"12368"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.3756999969482422,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.3756999969482422,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.32269999384880066,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6443343758583069},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.4708998501300812},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4627760350704193},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45706406235694885},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4209921956062317},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3906467854976654},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3859897255897522},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35502946376800537},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.3277544677257538},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.29012376070022583}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6443343758583069},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.4708998501300812},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4627760350704193},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45706406235694885},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4209921956062317},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3906467854976654},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3859897255897522},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35502946376800537},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3277544677257538},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.29012376070022583}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3588892","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3588892","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W2600389231","https://openalex.org/W2796314858","https://openalex.org/W2905153662","https://openalex.org/W3034552680","https://openalex.org/W3093370878","https://openalex.org/W3096719817","https://openalex.org/W3138808092","https://openalex.org/W3142113686","https://openalex.org/W3194523157","https://openalex.org/W4285192809","https://openalex.org/W4293330951","https://openalex.org/W4309368547","https://openalex.org/W4360993849","https://openalex.org/W4386072325","https://openalex.org/W4386075510","https://openalex.org/W4386075843","https://openalex.org/W4390873481","https://openalex.org/W4391097412","https://openalex.org/W4391893186","https://openalex.org/W4393154594","https://openalex.org/W4395474918","https://openalex.org/W4395960521","https://openalex.org/W4399039312","https://openalex.org/W4399557580","https://openalex.org/W4399800460","https://openalex.org/W4400911572","https://openalex.org/W4402557613","https://openalex.org/W4402703108","https://openalex.org/W4402727885","https://openalex.org/W4402754067","https://openalex.org/W4402916637","https://openalex.org/W4403063812","https://openalex.org/W4404035171","https://openalex.org/W4404782618","https://openalex.org/W4404784276","https://openalex.org/W4404901703","https://openalex.org/W4405598479","https://openalex.org/W4409129027","https://openalex.org/W4415798317"],"related_works":["https://openalex.org/W2068608913","https://openalex.org/W3124914020","https://openalex.org/W2141033859","https://openalex.org/W2156434174","https://openalex.org/W2071701083","https://openalex.org/W2383687187","https://openalex.org/W2081517010","https://openalex.org/W2121496884","https://openalex.org/W3126677997","https://openalex.org/W1610857240"],"abstract_inverted_index":{"Vision":[0],"Large":[1],"Language":[2],"Models":[3],"(VLLMs)":[4],"exhibit":[5],"promising":[6],"potential":[7],"for":[8],"multi-modal":[9],"understanding,":[10],"yet":[11],"their":[12],"application":[13],"to":[14,48],"video-based":[15],"emotion":[16,69,135],"recognition":[17,70,136],"remains":[18],"limited":[19],"by":[20,71],"insufficient":[21],"spatial":[22,73],"and":[23,44,85,112],"contextual":[24,86],"awareness.":[25],"Traditional":[26],"approaches,":[27],"which":[28],"prioritize":[29],"isolated":[30],"facial":[31,78,109],"features,":[32],"often":[33],"neglect":[34],"critical":[35],"non-verbal":[36],"cues":[37,87],"such":[38],"as":[39],"body":[40],"language,":[41],"environmental":[42],"context,":[43],"social":[45],"interactions,":[46],"leading":[47],"reduced":[49],"robustness":[50],"in":[51,131],"real-world":[52],"scenarios.":[53],"To":[54],"address":[55],"this":[56],"gap,":[57],"we":[58],"propose":[59],"Set-of-Vision-Text":[60],"Prompting":[61],"(SoVTP),":[62],"a":[63,95],"novel":[64],"framework":[65],"that":[66,118],"enhances":[67],"zero-shot":[68],"integrating":[72],"annotations":[74],"(e.g.,":[75],"bounding":[76],"boxes,":[77],"landmarks),":[79],"physiological":[80],"signals":[81],"(facial":[82],"action":[83],"units),":[84],"(body":[88],"posture,":[89],"scene":[90,102],"dynamics,":[91],"others\u2019":[92],"emotions)":[93],"into":[94],"unified":[96],"prompting":[97,126],"strategy.":[98],"SoVTP":[99,119],"preserves":[100],"holistic":[101],"information":[103],"while":[104],"enabling":[105],"fine-grained":[106],"analysis":[107],"of":[108],"muscle":[110],"movements":[111],"interpersonal":[113],"dynamics.":[114],"Extensive":[115],"experiments":[116],"show":[117],"achieves":[120],"substantial":[121],"improvements":[122],"over":[123],"existing":[124],"visual":[125],"methods,":[127],"demonstrating":[128],"its":[129],"effectiveness":[130],"enhancing":[132],"VLLMs\u2019":[133],"video":[134],"capabilities.":[137]},"counts_by_year":[{"year":2026,"cited_by_count":4}],"updated_date":"2026-04-26T08:31:28.666265","created_date":"2025-10-10T00:00:00"}
