{"id":"https://openalex.org/W7131134231","doi":"https://doi.org/10.1109/iccvw69036.2025.00013","title":"Zero-Shot Multimodal Compound Expression Recognition Approach Using Off-the-Shelf Large Visual-Language Models","display_name":"Zero-Shot Multimodal Compound Expression Recognition Approach Using Off-the-Shelf Large Visual-Language Models","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W7131134231","doi":"https://doi.org/10.1109/iccvw69036.2025.00013"},"language":null,"primary_location":{"id":"doi:10.1109/iccvw69036.2025.00013","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00013","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052465844","display_name":"Elena Ryumina","orcid":"https://orcid.org/0000-0002-4135-6949"},"institutions":[{"id":"https://openalex.org/I1313323035","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25","country_code":"RU","type":"government","lineage":["https://openalex.org/I1313323035"]},{"id":"https://openalex.org/I4210144352","display_name":"State Research Center of the Russian Federation","ror":"https://ror.org/04qpssw85","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210144352"]}],"countries":["RU"],"is_corresponding":true,"raw_author_name":"Elena Ryumina","raw_affiliation_strings":["St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia"],"affiliations":[{"raw_affiliation_string":"St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia","institution_ids":["https://openalex.org/I4210144352","https://openalex.org/I1313323035"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050029665","display_name":"Maxim Markitantov","orcid":"https://orcid.org/0000-0001-7987-1025"},"institutions":[{"id":"https://openalex.org/I1313323035","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25","country_code":"RU","type":"government","lineage":["https://openalex.org/I1313323035"]},{"id":"https://openalex.org/I4210144352","display_name":"State Research Center of the Russian Federation","ror":"https://ror.org/04qpssw85","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210144352"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Maxim Markitantov","raw_affiliation_strings":["St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia"],"affiliations":[{"raw_affiliation_string":"St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia","institution_ids":["https://openalex.org/I4210144352","https://openalex.org/I1313323035"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126651377","display_name":"Alexandr Axyonov","orcid":null},"institutions":[{"id":"https://openalex.org/I1313323035","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25","country_code":"RU","type":"government","lineage":["https://openalex.org/I1313323035"]},{"id":"https://openalex.org/I4210144352","display_name":"State Research Center of the Russian Federation","ror":"https://ror.org/04qpssw85","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210144352"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Alexandr Axyonov","raw_affiliation_strings":["St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia"],"affiliations":[{"raw_affiliation_string":"St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia","institution_ids":["https://openalex.org/I4210144352","https://openalex.org/I1313323035"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008249311","display_name":"Dmitry Ryumin","orcid":"https://orcid.org/0000-0002-7935-0569"},"institutions":[{"id":"https://openalex.org/I1313323035","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25","country_code":"RU","type":"government","lineage":["https://openalex.org/I1313323035"]},{"id":"https://openalex.org/I4210144352","display_name":"State Research Center of the Russian Federation","ror":"https://ror.org/04qpssw85","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210144352"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Dmitry Ryumin","raw_affiliation_strings":["St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia"],"affiliations":[{"raw_affiliation_string":"St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia","institution_ids":["https://openalex.org/I4210144352","https://openalex.org/I1313323035"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053311653","display_name":"Mikhail Dolgushin","orcid":"https://orcid.org/0000-0002-4344-2330"},"institutions":[{"id":"https://openalex.org/I1313323035","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25","country_code":"RU","type":"government","lineage":["https://openalex.org/I1313323035"]},{"id":"https://openalex.org/I4210144352","display_name":"State Research Center of the Russian Federation","ror":"https://ror.org/04qpssw85","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210144352"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Mikhail Dolgushin","raw_affiliation_strings":["St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia"],"affiliations":[{"raw_affiliation_string":"St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia","institution_ids":["https://openalex.org/I4210144352","https://openalex.org/I1313323035"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032870944","display_name":"Alexey Karpov","orcid":"https://orcid.org/0000-0003-3424-652X"},"institutions":[{"id":"https://openalex.org/I173089394","display_name":"ITMO University","ror":"https://ror.org/04txgxn49","country_code":"RU","type":"education","lineage":["https://openalex.org/I173089394"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Alexey Karpov","raw_affiliation_strings":["ITMO University,St. Petersburg,Russia"],"affiliations":[{"raw_affiliation_string":"ITMO University,St. Petersburg,Russia","institution_ids":["https://openalex.org/I173089394"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5052465844"],"corresponding_institution_ids":["https://openalex.org/I1313323035","https://openalex.org/I4210144352"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.75110517,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"71","last_page":"79"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3140999972820282,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3140999972820282,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.04960000142455101,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.04360000044107437,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.583899974822998},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5717999935150146},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5551000237464905},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.5425999760627747},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.4887999892234802},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.4848000109195709},{"id":"https://openalex.org/keywords/facial-expression-recognition","display_name":"Facial expression recognition","score":0.4657000005245209},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.46480000019073486},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.44859999418258667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7203999757766724},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6717000007629395},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.583899974822998},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5717999935150146},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5551000237464905},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.5425999760627747},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.4887999892234802},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.4848000109195709},{"id":"https://openalex.org/C2987714656","wikidata":"https://www.wikidata.org/wiki/Q1185804","display_name":"Facial expression recognition","level":4,"score":0.4657000005245209},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.46480000019073486},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.44859999418258667},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.44130000472068787},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.38029998540878296},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.36550000309944153},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3562999963760376},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.35589998960494995},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.3416000008583069},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.3264999985694885},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3212999999523163},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.303600013256073},{"id":"https://openalex.org/C2983787585","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature matching","level":3,"score":0.2971000075340271},{"id":"https://openalex.org/C206310091","wikidata":"https://www.wikidata.org/wiki/Q750859","display_name":"Emotion classification","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.27709999680519104},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2578999996185303},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2524000108242035},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2515999972820282},{"id":"https://openalex.org/C121687571","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Activity recognition","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccvw69036.2025.00013","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00013","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2713788831","https://openalex.org/W2980495289","https://openalex.org/W3126750668","https://openalex.org/W3209397829","https://openalex.org/W3209984917","https://openalex.org/W4206487945","https://openalex.org/W4292794012","https://openalex.org/W4303437686","https://openalex.org/W4321353595","https://openalex.org/W4385815442","https://openalex.org/W4386066245","https://openalex.org/W4393159266","https://openalex.org/W4402916216","https://openalex.org/W4402916217","https://openalex.org/W4402916389","https://openalex.org/W4402917224","https://openalex.org/W4410773809","https://openalex.org/W4410773870","https://openalex.org/W4410775661","https://openalex.org/W4410966013","https://openalex.org/W4414197330","https://openalex.org/W4414198434"],"related_works":[],"abstract_inverted_index":{"Compound":[0,164],"Expression":[1],"Recognition":[2],"(CER),":[3],"a":[4,26,39,85],"subfield":[5],"of":[6,18,131],"affective":[7],"computing,":[8],"aims":[9],"to":[10,114,152],"detect":[11],"complex":[12],"emotional":[13],"states":[14],"formed":[15],"by":[16,97],"combinations":[17],"basic":[19],"emotions.":[20],"In":[21],"this":[22],"work,":[23],"we":[24],"present":[25],"novel":[27],"zero-shot":[28,67,149],"multimodal":[29],"approach":[30,65,126,161],"for":[31,78],"CER":[32],"that":[33,91,101],"combines":[34],"six":[35],"heterogeneous":[36],"modalities":[37],"into":[38],"single":[40],"pipeline:":[41],"static":[42],"and":[43,48,54,76,144],"dynamic":[44],"facial":[45],"expressions,":[46],"scene":[47,51,80],"label":[49,74],"matching,":[50],"context,":[52],"audio,":[53],"text.":[55],"Unlike":[56],"previous":[57],"approaches":[58,154],"relying":[59],"on":[60,133,136,146,156],"task-specific":[61],"training":[62],"data,":[63],"our":[64,160],"uses":[66,102],"components,":[68],"including":[69],"Contrastive":[70],"Language-Image":[71],"Pretraining":[72],"(CLIP)-based":[73],"matching":[75],"Qwen-VL":[77],"semantic":[79],"understanding.":[81],"We":[82],"further":[83],"introduce":[84],"Multi-Head":[86],"Probability":[87,104],"Fusion":[88],"(MHPF)":[89],"module":[90],"dynamically":[92],"weights":[93],"modality-specific":[94],"predictions,":[95],"followed":[96],"basic-to-compound":[98],"emotion":[99,118],"conversion":[100],"Pair-wise":[103,108],"Aggregation":[105,111],"(PPA)":[106],"or":[107],"Feature":[109],"Similarity":[110],"(PFSA)":[112],"methods":[113],"produce":[115],"interpretable":[116],"compound":[117],"outputs.":[119],"Evaluated":[120],"under":[121],"multi-corpus":[122],"training,":[123],"the":[124],"proposed":[125],"achieves":[127],"macro-F":[128],"1":[129],"scores":[130],"46.95%":[132],"AffWild2,":[134],"49.02%":[135],"Acted":[137],"Facial":[138],"Expressions":[139,165],"in":[140],"The":[141,170],"Wild":[142],"(AFEW),":[143],"34.85%":[145],"C-EXPR-DB":[147],"via":[148],"testing,":[150],"comparable":[151],"supervised":[153],"trained":[155],"target":[157],"data.":[158],"Thus,":[159],"effectively":[162],"captures":[163],"(CE)":[166],"without":[167],"domain":[168],"adaptation.":[169],"source":[171],"code":[172],"is":[173],"publicly":[174],"available":[175],"at":[176],"https://github.com/SMIL-SPCRAS/ICCVW_25.":[177]},"counts_by_year":[],"updated_date":"2026-02-25T06:17:34.324206","created_date":"2026-02-24T00:00:00"}
