{"id":"https://openalex.org/W7136871104","doi":"https://doi.org/10.48550/arxiv.2603.13056","title":"Team RAS in 10th ABAW Competition: Multimodal Valence and Arousal Estimation Approach","display_name":"Team RAS in 10th ABAW Competition: Multimodal Valence and Arousal Estimation Approach","publication_year":2026,"publication_date":"2026-03-13","ids":{"openalex":"https://openalex.org/W7136871104","doi":"https://doi.org/10.48550/arxiv.2603.13056"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13056","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13056","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13056","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052465844","display_name":"Elena Ryumina","orcid":"https://orcid.org/0000-0002-4135-6949"},"institutions":[{"id":"https://openalex.org/I1313323035","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25","country_code":"RU","type":"government","lineage":["https://openalex.org/I1313323035"]},{"id":"https://openalex.org/I4210144352","display_name":"State Research Center of the Russian Federation","ror":"https://ror.org/04qpssw85","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210144352"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Ryumina, Elena","raw_affiliation_strings":["St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia","institution_ids":["https://openalex.org/I4210144352","https://openalex.org/I1313323035"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050029665","display_name":"Maxim Markitantov","orcid":"https://orcid.org/0000-0001-7987-1025"},"institutions":[{"id":"https://openalex.org/I1313323035","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25","country_code":"RU","type":"government","lineage":["https://openalex.org/I1313323035"]},{"id":"https://openalex.org/I4210144352","display_name":"State Research Center of the Russian Federation","ror":"https://ror.org/04qpssw85","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210144352"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Markitantov, Maxim","raw_affiliation_strings":["St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia","institution_ids":["https://openalex.org/I4210144352","https://openalex.org/I1313323035"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036332146","display_name":"Alexandr Axyonov","orcid":"https://orcid.org/0000-0002-7479-2851"},"institutions":[{"id":"https://openalex.org/I1313323035","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25","country_code":"RU","type":"government","lineage":["https://openalex.org/I1313323035"]},{"id":"https://openalex.org/I4210144352","display_name":"State Research Center of the Russian Federation","ror":"https://ror.org/04qpssw85","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210144352"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Axyonov, Alexandr","raw_affiliation_strings":["St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia","institution_ids":["https://openalex.org/I4210144352","https://openalex.org/I1313323035"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008249311","display_name":"Dmitry Ryumin","orcid":"https://orcid.org/0000-0002-7935-0569"},"institutions":[{"id":"https://openalex.org/I1313323035","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25","country_code":"RU","type":"government","lineage":["https://openalex.org/I1313323035"]},{"id":"https://openalex.org/I4210144352","display_name":"State Research Center of the Russian Federation","ror":"https://ror.org/04qpssw85","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210144352"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Ryumin, Dmitry","raw_affiliation_strings":["St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia","institution_ids":["https://openalex.org/I4210144352","https://openalex.org/I1313323035"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053311653","display_name":"Mikhail Dolgushin","orcid":"https://orcid.org/0000-0002-4344-2330"},"institutions":[{"id":"https://openalex.org/I1313323035","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25","country_code":"RU","type":"government","lineage":["https://openalex.org/I1313323035"]},{"id":"https://openalex.org/I4210144352","display_name":"State Research Center of the Russian Federation","ror":"https://ror.org/04qpssw85","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210144352"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Dolgushin, Mikhail","raw_affiliation_strings":["St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia","institution_ids":["https://openalex.org/I4210144352","https://openalex.org/I1313323035"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031192722","display_name":"Denis Dresvyanskiy","orcid":"https://orcid.org/0000-0002-5141-2561"},"institutions":[{"id":"https://openalex.org/I173089394","display_name":"ITMO University","ror":"https://ror.org/04txgxn49","country_code":"RU","type":"education","lineage":["https://openalex.org/I173089394"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Dresvyanskiy, Denis","raw_affiliation_strings":["ITMO University, St. Petersburg, Russia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ITMO University, St. Petersburg, Russia","institution_ids":["https://openalex.org/I173089394"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032870944","display_name":"Alexey Karpov","orcid":"https://orcid.org/0000-0003-3424-652X"},"institutions":[{"id":"https://openalex.org/I1313323035","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25","country_code":"RU","type":"government","lineage":["https://openalex.org/I1313323035"]},{"id":"https://openalex.org/I173089394","display_name":"ITMO University","ror":"https://ror.org/04txgxn49","country_code":"RU","type":"education","lineage":["https://openalex.org/I173089394"]},{"id":"https://openalex.org/I4210144352","display_name":"State Research Center of the Russian Federation","ror":"https://ror.org/04qpssw85","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210144352"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Karpov, Alexey","raw_affiliation_strings":["St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia","ITMO University, St. Petersburg, Russia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"St. Petersburg Federal Research Center of the Russian Academy of Sciences, St. Petersburg, Russia","institution_ids":["https://openalex.org/I4210144352","https://openalex.org/I1313323035"]},{"raw_affiliation_string":"ITMO University, St. Petersburg, Russia","institution_ids":["https://openalex.org/I173089394"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.0015999999595806003,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.0005000000237487257,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/valence","display_name":"Valence (chemistry)","score":0.6122000217437744},{"id":"https://openalex.org/keywords/arousal","display_name":"Arousal","score":0.5659000277519226},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5320000052452087},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5006999969482422},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.4595000147819519},{"id":"https://openalex.org/keywords/affective-computing","display_name":"Affective computing","score":0.4575999975204468},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.4397999942302704},{"id":"https://openalex.org/keywords/concordance-correlation-coefficient","display_name":"Concordance correlation coefficient","score":0.4311999976634979}],"concepts":[{"id":"https://openalex.org/C168900304","wikidata":"https://www.wikidata.org/wiki/Q171407","display_name":"Valence (chemistry)","level":2,"score":0.6122000217437744},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6110000014305115},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.609499990940094},{"id":"https://openalex.org/C36951298","wikidata":"https://www.wikidata.org/wiki/Q379784","display_name":"Arousal","level":2,"score":0.5659000277519226},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5320000052452087},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5006999969482422},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.4595000147819519},{"id":"https://openalex.org/C6438553","wikidata":"https://www.wikidata.org/wiki/Q1185804","display_name":"Affective computing","level":2,"score":0.4575999975204468},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4401000142097473},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.4397999942302704},{"id":"https://openalex.org/C2781059462","wikidata":"https://www.wikidata.org/wiki/Q5158906","display_name":"Concordance correlation coefficient","level":2,"score":0.4311999976634979},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.4074000120162964},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3919999897480011},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3578999936580658},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.33559998869895935},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.3165000081062317},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.3158999979496002},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.289900004863739},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.2849999964237213},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2612999975681305},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2603999972343445}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13056","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13056","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13056","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13056","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Continuous":[0],"emotion":[1],"recognition":[2],"in":[3,21],"terms":[4],"of":[5,30,103,181],"valence":[6],"and":[7,27,50,60,93,130],"arousal":[8],"under":[9],"in-the-wild":[10],"(ITW)":[11],"conditions":[12],"remains":[13],"a":[14,35,95,116,131,176],"challenging":[15],"problem":[16],"due":[17],"to":[18,67,78,99],"large":[19],"variations":[20],"appearance,":[22],"head":[23],"pose,":[24],"illumination,":[25],"occlusions,":[26],"subject-specific":[28],"patterns":[29],"affective":[31],"expression.":[32],"We":[33,64],"present":[34],"multimodal":[36,172],"method":[37,43],"for":[38],"valence-arousal":[39],"estimation":[40],"ITW.":[41],"Our":[42],"combines":[44,137],"three":[45],"complementary":[46,147],"modalities:":[47],"face,":[48],"behavior,":[49],"audio.":[51],"The":[52,84,149],"face":[53],"modality":[54,86],"relies":[55,87],"on":[56,88,153,183],"GRADA-based":[57],"frame-level":[58,142],"embeddings":[59],"Transformer-based":[61],"temporal":[62,80],"regression.":[63],"use":[65],"Qwen3-VL-4B-Instruct":[66],"extract":[68],"behavior-relevant":[69],"information":[70],"from":[71],"video":[72],"segments,":[73],"while":[74,143],"Mamba":[75],"is":[76],"used":[77],"model":[79],"dynamics":[81],"across":[82],"segments.":[83,107],"audio":[85,145],"WavLM-Large":[89],"with":[90,127],"attention-statistics":[91],"pooling":[92],"includes":[94],"cross-modal":[96],"filtering":[97],"stage":[98],"reduce":[100],"the":[101,141,154,158,170,184],"influence":[102],"unreliable":[104],"or":[105],"non-speech":[106],"To":[108],"fuse":[109],"modalities,":[110],"we":[111],"explore":[112],"two":[113],"fusion":[114,173],"strategies:":[115],"Directed":[117],"Cross-Modal":[118],"Mixture-of-Experts":[119],"Fusion":[120,134],"Strategy":[121,135],"that":[122,136,169],"learns":[123],"interactions":[124],"between":[125],"modalities":[126],"adaptive":[128],"weighting,":[129],"Reliability-Aware":[132],"Audio-Visual":[133],"visual":[138],"features":[139],"at":[140],"using":[144],"as":[146],"context.":[148],"results":[150],"are":[151],"reported":[152],"Aff-Wild2":[155,185],"dataset":[156],"following":[157],"10th":[159],"Affective":[160],"Behavior":[161],"Analysis":[162],"in-the-Wild":[163],"(ABAW)":[164],"challenge":[165],"protocol.":[166],"Experiments":[167],"demonstrate":[168],"proposed":[171],"strategy":[174],"achieves":[175],"Concordance":[177],"Correlation":[178],"Coefficient":[179],"(CCC)":[180],"0.658":[182],"development":[186],"set.":[187]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-17T00:00:00"}
