{"id":"https://openalex.org/W7127923648","doi":"https://doi.org/10.48550/arxiv.2602.04260","title":"Decoupled Hierarchical Distillation for Multimodal Emotion Recognition","display_name":"Decoupled Hierarchical Distillation for Multimodal Emotion Recognition","publication_year":2026,"publication_date":"2026-02-04","ids":{"openalex":"https://openalex.org/W7127923648","doi":"https://doi.org/10.48550/arxiv.2602.04260"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.04260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.04260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.04260","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125099861","display_name":"Yong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Yong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122894213","display_name":"Yuanzhi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yuanzhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125186586","display_name":"Yi Ding","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Yi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100452825","display_name":"Shu Zhang","orcid":"https://orcid.org/0000-0001-5011-8566"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shiqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125243014","display_name":"Ke Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Ke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5120979127","display_name":"Cuntai Guan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guan, Cuntai","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5125099861"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.002099999925121665,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.0008999999845400453,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6816999912261963},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.6262000203132629},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5358999967575073},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.49410000443458557},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.48089998960494995},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4733999967575073},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4693000018596649}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7299000024795532},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6816999912261963},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6783000230789185},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.6262000203132629},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5358999967575073},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.49410000443458557},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4855000078678131},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.48089998960494995},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4733999967575073},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4693000018596649},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.43059998750686646},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3785000145435333},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C121687571","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Activity recognition","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.2630999982357025},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.04260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.04260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.04260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.04260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7828307747840881,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Human":[0],"multimodal":[1,32],"emotion":[2],"recognition":[3],"(MER)":[4],"seeks":[5],"to":[6,118],"infer":[7],"human":[8],"emotions":[9],"by":[10],"integrating":[11],"information":[12],"from":[13,37],"language,":[14],"visual,":[15],"and":[16,34,62,102,132,153,171],"acoustic":[17],"modalities.":[18,39],"Although":[19],"existing":[20],"MER":[21,122,146],"approaches":[22],"have":[23],"achieved":[24],"promising":[25],"results,":[26],"they":[27],"still":[28],"struggle":[29],"with":[30],"inherent":[31],"heterogeneities":[33],"varying":[35],"contributions":[36],"different":[38],"To":[40],"address":[41],"these":[42],"challenges,":[43],"we":[44],"propose":[45],"a":[46,67,73,83,94,107],"novel":[47],"framework,":[48],"Decoupled":[49],"Hierarchical":[50],"Multimodal":[51],"Distillation":[52,85],"(DHMD).":[53],"DHMD":[54,142,175],"decouples":[55],"each":[56,89],"modality's":[57],"features":[58],"into":[59],"modality-irrelevant":[60],"(homogeneous)":[61],"modality-exclusive":[63],"(heterogeneous)":[64],"components":[65],"using":[66],"self-regression":[68],"mechanism.":[69],"The":[70],"framework":[71],"employs":[72],"two-stage":[74],"knowledge":[75,130],"distillation":[76,99,126],"(KD)":[77],"strategy:":[78],"(1)":[79],"coarse-grained":[80],"KD":[81,105],"via":[82],"Graph":[84],"Unit":[86],"(GD-Unit)":[87],"in":[88,174],"decoupled":[90],"feature":[91,136,182],"space,":[92],"where":[93],"dynamic":[95],"graph":[96,169],"facilitates":[97],"adaptive":[98],"among":[100],"modalities,":[101],"(2)":[103],"fine-grained":[104],"through":[106],"cross-modal":[108,135],"dictionary":[109,172],"matching":[110],"mechanism,":[111],"which":[112],"aligns":[113],"semantic":[114],"granularities":[115],"across":[116,180],"modalities":[117],"produce":[119],"more":[120],"discriminative":[121],"representations.":[123],"This":[124],"hierarchical":[125],"approach":[127],"enables":[128],"flexible":[129],"transfer":[131],"effectively":[133],"improves":[134],"alignment.":[137],"Experimental":[138],"results":[139,164],"demonstrate":[140],"that":[141,166],"consistently":[143],"outperforms":[144],"state-of-the-art":[145],"methods,":[147],"achieving":[148],"1.3\\%/2.4\\%":[149],"(ACC$_7$),":[150],"1.3\\%/1.9\\%":[151],"(ACC$_2$)":[152],"1.9\\%/1.8\\%":[154],"(F1)":[155],"relative":[156],"improvement":[157],"on":[158],"CMU-MOSI/CMU-MOSEI":[159],"dataset,":[160],"respectively.":[161],"Meanwhile,":[162],"visualization":[163],"reveal":[165],"both":[167],"the":[168],"edges":[170],"activations":[173],"exhibit":[176],"meaningful":[177],"distribution":[178],"patterns":[179],"modality-irrelevant/-exclusive":[181],"spaces.":[183]},"counts_by_year":[],"updated_date":"2026-02-07T06:15:42.627816","created_date":"2026-02-07T00:00:00"}
