{"id":"https://openalex.org/W7133303573","doi":"https://doi.org/10.48550/arxiv.2603.02123","title":"Nano-EmoX: Unifying Multimodal Emotional Intelligence from Perception to Empathy","display_name":"Nano-EmoX: Unifying Multimodal Emotional Intelligence from Perception to Empathy","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133303573","doi":"https://doi.org/10.48550/arxiv.2603.02123"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.02123","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02123","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.02123","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123000973","display_name":"Jiahao Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Huang, Jiahao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040228451","display_name":"Feng Lin","orcid":"https://orcid.org/0000-0003-2722-9588"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Fengyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127960816","display_name":"Xuechao Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xuechao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128004756","display_name":"Chen Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127960587","display_name":"Kexin Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Kexin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128019762","display_name":"Xu Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5089127109","display_name":"Z. Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhide","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5123000973"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.6080999970436096,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.6080999970436096,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.21439999341964722,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.016499999910593033,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5752000212669373},{"id":"https://openalex.org/keywords/empathy","display_name":"Empathy","score":0.5309000015258789},{"id":"https://openalex.org/keywords/emotional-intelligence","display_name":"Emotional intelligence","score":0.5126000046730042},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.4602999985218048},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.44620001316070557},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.43369999527931213},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4131999909877777},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.3894999921321869}],"concepts":[{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5752000212669373},{"id":"https://openalex.org/C2779885105","wikidata":"https://www.wikidata.org/wiki/Q182263","display_name":"Empathy","level":2,"score":0.5309000015258789},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.5303000211715698},{"id":"https://openalex.org/C174107131","wikidata":"https://www.wikidata.org/wiki/Q191591","display_name":"Emotional intelligence","level":2,"score":0.5126000046730042},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.46070000529289246},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.4602999985218048},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.45649999380111694},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.44620001316070557},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.43369999527931213},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4131999909877777},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.3894999921321869},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.3790999948978424},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.36550000309944153},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.30730000138282776},{"id":"https://openalex.org/C20253421","wikidata":"https://www.wikidata.org/wiki/Q477298","display_name":"Body language","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3052000105381012},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30329999327659607},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.2833000123500824},{"id":"https://openalex.org/C2776035688","wikidata":"https://www.wikidata.org/wiki/Q1606558","display_name":"Affect (linguistics)","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C105409693","wikidata":"https://www.wikidata.org/wiki/Q5937824","display_name":"Human intelligence","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.02123","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02123","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.02123","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02123","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6162550449371338,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,105,178],"development":[1],"of":[2,83,143],"affective":[3,24,42,59,99,125,157],"multimodal":[4,98],"language":[5,112,120],"models":[6],"(MLMs)":[7],"has":[8],"long":[9],"been":[10],"constrained":[11],"by":[12,62,133],"a":[13,35,53,68,75,81,92,110,118],"gap":[14],"between":[15],"low-level":[16],"perception":[17,136],"and":[18,26,50,72,91,101,176],"high-level":[19],"interaction,":[20],"leading":[21],"to":[22,45,95,122,153],"fragmented":[23],"capabilities":[25],"limited":[27],"generalization.":[28,177],"To":[29,140],"bridge":[30],"this":[31,63],"gap,":[32],"we":[33,65],"propose":[34],"cognitively":[36],"inspired":[37],"three-level":[38],"hierarchy":[39,162],"that":[40],"organizes":[41],"tasks":[43,158],"according":[44],"their":[46],"cognitive":[47],"depth-perception,":[48],"understanding,":[49],"interaction-and":[51],"provides":[52],"unified":[54,111],"conceptual":[55],"foundation":[56],"for":[57],"advancing":[58],"modeling.":[60],"Guided":[61],"hierarchy,":[64],"introduce":[66],"Nano-EmoX,":[67],"small-scale":[69],"multitask":[70],"MLM,":[71],"P2E":[73,128],"(Perception-to-Empathy),":[74],"curriculum-based":[76],"training":[77],"framework.":[78],"Nano-EmoX":[79,146],"integrates":[80],"suite":[82],"omni-modal":[84],"encoders,":[85],"including":[86],"an":[87],"enhanced":[88],"facial":[89],"encoder":[90],"fusion":[93],"encoder,":[94],"capture":[96],"key":[97],"cues":[100],"improve":[102],"cross-task":[103],"transferability.":[104],"outputs":[106],"are":[107],"projected":[108],"into":[109],"space":[113],"via":[114],"heterogeneous":[115],"adapters,":[116],"empowering":[117],"lightweight":[119],"model":[121],"tackle":[123],"diverse":[124],"tasks.":[126],"Concurrently,":[127],"progressively":[129],"cultivates":[130],"emotional":[131],"intelligence":[132],"aligning":[134],"rapid":[135],"with":[137],"chain-of-thought-driven":[138],"empathy.":[139],"the":[141,148],"best":[142],"our":[144],"knowledge,":[145],"is":[147,180],"first":[149],"compact":[150],"MLM":[151],"(2.2B)":[152],"unify":[154],"six":[155],"core":[156],"across":[159,170],"all":[160],"three":[161],"levels,":[163],"achieving":[164],"state-of-the-art":[165],"or":[166],"highly":[167],"competitive":[168],"performance":[169],"multiple":[171],"benchmarks,":[172],"demonstrating":[173],"excellent":[174],"efficiency":[175],"code":[179],"available":[181],"at":[182],"https://github.com/waHAHJIAHAO/Nano-EmoX.":[183]},"counts_by_year":[],"updated_date":"2026-04-15T05:59:14.812645","created_date":"2026-03-04T00:00:00"}
