{"id":"https://openalex.org/W7154129181","doi":"https://doi.org/10.48550/arxiv.2604.08990","title":"ActFER: Agentic Facial Expression Recognition via Active Tool-Augmented Visual Reasoning","display_name":"ActFER: Agentic Facial Expression Recognition via Active Tool-Augmented Visual Reasoning","publication_year":2026,"publication_date":"2026-04-10","ids":{"openalex":"https://openalex.org/W7154129181","doi":"https://doi.org/10.48550/arxiv.2604.08990"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.08990","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08990","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.08990","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133486885","display_name":"Shifeng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Shifeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054504343","display_name":"Zhengye Zhang","orcid":"https://orcid.org/0009-0008-1558-827X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhengye","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133495296","display_name":"Sirui Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Sirui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074851500","display_name":"Xinglong Mao","orcid":"https://orcid.org/0000-0003-0019-2295"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mao, Xinglong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052517470","display_name":"Zhehan Kan","orcid":"https://orcid.org/0000-0002-1069-649X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kan, Zhehan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133492972","display_name":"Zhixiang Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Zhixiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133545873","display_name":"Shiwei Wu","orcid":"https://orcid.org/0000-0001-9838-9066"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Shiwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014172220","display_name":"Chaoyou Fu","orcid":"https://orcid.org/0000-0002-0079-7668"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Chaoyou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133552298","display_name":"Tong Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Tong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133503292","display_name":"Enhong Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Enhong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5133486885"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9732000231742859,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9732000231742859,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.00570000009611249,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.00570000009611249,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5756000280380249},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.5221999883651733},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5123999714851379},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.48030000925064087},{"id":"https://openalex.org/keywords/active-appearance-model","display_name":"Active appearance model","score":0.4691999852657318},{"id":"https://openalex.org/keywords/active-vision","display_name":"Active vision","score":0.40560001134872437},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.384799987077713},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.3788999915122986}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.715499997138977},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6237999796867371},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5756000280380249},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.5221999883651733},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5123999714851379},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.48030000925064087},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.4691999852657318},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4537999927997589},{"id":"https://openalex.org/C193611912","wikidata":"https://www.wikidata.org/wiki/Q4677596","display_name":"Active vision","level":2,"score":0.40560001134872437},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.384799987077713},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.3788999915122986},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3538999855518341},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.3522999882698059},{"id":"https://openalex.org/C2776035688","wikidata":"https://www.wikidata.org/wiki/Q1606558","display_name":"Affect (linguistics)","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C2779321571","wikidata":"https://www.wikidata.org/wiki/Q7936605","display_name":"Visual learning","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2603999972343445},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.08990","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08990","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.08990","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08990","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2],"Multimodal":[3],"Large":[4],"Language":[5],"Models":[6],"(MLLMs)":[7],"have":[8],"created":[9],"new":[10],"opportunities":[11],"for":[12,55,86,147],"facial":[13,42,57,100],"expression":[14],"recognition":[15],"(FER),":[16],"moving":[17],"it":[18],"beyond":[19],"pure":[20],"label":[21],"prediction":[22,202],"toward":[23],"reasoning-based":[24],"affect":[25],"understanding.":[26],"However,":[27],"existing":[28],"MLLM-based":[29,195],"FER":[30,71,196],"methods":[31],"still":[32],"follow":[33],"a":[34,107,120],"passive":[35,194],"paradigm:":[36],"they":[37],"rely":[38],"on":[39],"externally":[40],"prepared":[41],"inputs":[43],"and":[44,89,97,104,150,176,198],"perform":[45],"single-pass":[46],"reasoning":[47],"over":[48,99,180],"fixed":[49],"visual":[50,74,108],"evidence,":[51],"without":[52],"the":[53,181],"capability":[54],"active":[56,73],"perception.":[58],"To":[59,110],"address":[60],"this":[61],"limitation,":[62],"we":[63,114],"propose":[64],"ActFER,":[65],"an":[66],"agentic":[67,126],"framework":[68],"that":[69,187],"reformulates":[70],"as":[72],"evidence":[75],"acquisition":[76],"followed":[77],"by":[78],"multimodal":[79],"reasoning.":[80],"Specifically,":[81],"ActFER":[82,167,188],"dynamically":[83],"invokes":[84],"tools":[85],"face":[87],"detection":[88],"alignment,":[90],"selectively":[91],"zooms":[92],"into":[93],"informative":[94],"local":[95,148,172],"regions,":[96],"reasons":[98],"Action":[101],"Units":[102],"(AUs)":[103],"emotions":[105],"through":[106],"Chain-of-Thought.":[109],"realize":[111],"such":[112],"behavior,":[113],"further":[115],"develop":[116],"Utility-Calibrated":[117],"GRPO":[118],"(UC-GRPO),":[119],"reinforcement":[121],"learning":[122],"algorithm":[123,165],"tailored":[124],"to":[125,134,141,154,168,178],"FER.":[127],"UC-GRPO":[128,191],"uses":[129],"AU-grounded":[130],"multi-level":[131],"verifiable":[132],"rewards":[133],"densify":[135],"supervision,":[136],"query-conditional":[137],"contrastive":[138],"utility":[139,157],"estimation":[140],"enable":[142],"sample-aware":[143],"dynamic":[144],"credit":[145],"assignment":[146],"inspection,":[149],"emotion-aware":[151],"EMA":[152],"calibration":[153],"reduce":[155],"noisy":[156],"estimates":[158],"while":[159],"capturing":[160],"emotion-wise":[161],"inspection":[162,173],"tendencies.":[163],"This":[164],"enables":[166],"learn":[169],"both":[170],"when":[171],"is":[174],"beneficial":[175],"how":[177],"reason":[179],"acquired":[182],"evidence.":[183],"Comprehensive":[184],"experiments":[185],"show":[186],"trained":[189],"with":[190],"consistently":[192],"outperforms":[193],"baselines":[197],"substantially":[199],"improves":[200],"AU":[201],"accuracy.":[203]},"counts_by_year":[],"updated_date":"2026-04-14T06:08:25.285971","created_date":"2026-04-14T00:00:00"}
