{"id":"https://openalex.org/W7138230812","doi":"https://doi.org/10.48550/arxiv.2603.13406","title":"Nuanced Emotion Recognition Based on a Segment-based MLLM Framework Leveraging Qwen3-Omni for AH Detection","display_name":"Nuanced Emotion Recognition Based on a Segment-based MLLM Framework Leveraging Qwen3-Omni for AH Detection","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7138230812","doi":"https://doi.org/10.48550/arxiv.2603.13406"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13406","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13406","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13406","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129642829","display_name":"Liang Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tang, Liang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088333603","display_name":"Hongda Li","orcid":"https://orcid.org/0000-0002-6854-8369"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hongda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129668146","display_name":"Jiayu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jiayu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129713475","display_name":"Long Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Long","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129749802","display_name":"Shuxian Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Shuxian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129705978","display_name":"Siqi Pei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pei, Siqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129680479","display_name":"Tiaonan Duan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Tiaonan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129702907","display_name":"Yuhao Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Yuhao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5129642829"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.000699999975040555,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.00039999998989515007,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6833999752998352},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5559999942779541},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.507099986076355},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.451200008392334},{"id":"https://openalex.org/keywords/affective-computing","display_name":"Affective computing","score":0.4052000045776367},{"id":"https://openalex.org/keywords/clips","display_name":"CLIPS","score":0.4011000096797943},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3635999858379364},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3450999855995178}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7484999895095825},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6833999752998352},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5559999942779541},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.507099986076355},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4788999855518341},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.451200008392334},{"id":"https://openalex.org/C6438553","wikidata":"https://www.wikidata.org/wiki/Q1185804","display_name":"Affective computing","level":2,"score":0.4052000045776367},{"id":"https://openalex.org/C2778739407","wikidata":"https://www.wikidata.org/wiki/Q165372","display_name":"CLIPS","level":2,"score":0.4011000096797943},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3991999924182892},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3953000009059906},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3635999858379364},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3450999855995178},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3395000100135803},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.32589998841285706},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.3248000144958496},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.31769999861717224},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.31690001487731934},{"id":"https://openalex.org/C2780665704","wikidata":"https://www.wikidata.org/wiki/Q959298","display_name":"Intervention (counseling)","level":2,"score":0.30799999833106995},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.2630999982357025},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.25459998846054077},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.25279998779296875},{"id":"https://openalex.org/C206310091","wikidata":"https://www.wikidata.org/wiki/Q750859","display_name":"Emotion classification","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13406","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13406","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13406","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13406","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6188831329345703,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Emotion":[0],"recognition":[1,61],"in":[2,8,80,160],"videos":[3,90],"is":[4,169],"a":[5,51,60,86,95],"pivotal":[6],"task":[7],"affective":[9],"computing,":[10],"where":[11],"identifying":[12],"subtle":[13],"psychological":[14],"states":[15,33],"such":[16,39],"as":[17,40],"Ambivalence":[18,30],"and":[19,27,31,47,77,113,127,150,163],"Hesitancy":[20,32],"holds":[21],"significant":[22],"value":[23],"for":[24,54],"behavioral":[25],"intervention":[26],"digital":[28],"health.":[29],"often":[34],"manifest":[35],"through":[36],"cross-modal":[37],"inconsistencies":[38],"discrepancies":[41],"between":[42],"facial":[43],"expressions,":[44],"vocal":[45],"tones,":[46],"textual":[48],"semantics,":[49],"posing":[50],"substantial":[52],"challenge":[53],"automated":[55],"recognition.":[56],"This":[57],"paper":[58],"proposes":[59],"framework":[62],"that":[63,133],"integrates":[64],"temporal":[65],"segment":[66],"modeling":[67],"with":[68,94],"Multimodal":[69,156],"Large":[70,157],"Language":[71,158],"Models.":[72],"To":[73],"address":[74],"computational":[75],"efficiency":[76],"token":[78],"constraints":[79],"long":[81],"video":[82],"processing,":[83],"we":[84],"employ":[85],"segment-based":[87],"strategy,":[88],"partitioning":[89],"into":[91],"short":[92],"clips":[93],"maximum":[96],"duration":[97],"of":[98,140,155],"5":[99],"seconds.":[100],"We":[101],"leverage":[102],"the":[103,108,117,121,134,143,152],"Qwen3-Omni-30B-A3B":[104],"model,":[105],"fine-tuned":[106],"on":[107,142],"BAH":[109],"dataset":[110],"using":[111],"LoRA":[112],"full-parameter":[114],"strategies":[115],"via":[116],"MS-Swift":[118],"framework,":[119],"enabling":[120],"model":[122],"to":[123],"synergistically":[124],"analyze":[125],"visual":[126],"auditory":[128],"signals.":[129],"Experimental":[130],"results":[131],"demonstrate":[132],"proposed":[135],"method":[136],"achieves":[137],"an":[138],"accuracy":[139],"85.1%":[141],"test":[144],"set,":[145],"significantly":[146],"outperforming":[147],"existing":[148],"benchmarks":[149],"validating":[151],"superior":[153],"capability":[154],"Models":[159],"capturing":[161],"complex":[162],"nuanced":[164],"emotional":[165],"conflicts.":[166],"The":[167],"code":[168],"released":[170],"at":[171],"https://github.com/dlnn123/A-H-Detection-with-Qwen-Omni.git.":[172]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
