{"id":"https://openalex.org/W7147034341","doi":"https://doi.org/10.48550/arxiv.2603.26908","title":"FusionAgent: A Multimodal Agent with Dynamic Model Selection for Human Recognition","display_name":"FusionAgent: A Multimodal Agent with Dynamic Model Selection for Human Recognition","publication_year":2026,"publication_date":"2026-03-27","ids":{"openalex":"https://openalex.org/W7147034341","doi":"https://doi.org/10.48550/arxiv.2603.26908"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.26908","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26908","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.26908","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132645020","display_name":"Jie Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132613156","display_name":"Xiao Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Xiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132622112","display_name":"Yiyang Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Yiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132621263","display_name":"Anil Jain","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jain, Anil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132578799","display_name":"Xiaoming Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xiaoming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.9327999949455261,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.9327999949455261,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.019700000062584877,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.0071000000461936,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5023999810218811},{"id":"https://openalex.org/keywords/biometrics","display_name":"Biometrics","score":0.48739999532699585},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.48010000586509705},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4296000003814697},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.39500001072883606},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.38530001044273376},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.351500004529953},{"id":"https://openalex.org/keywords/model-selection","display_name":"Model selection","score":0.35089999437332153}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7300999760627747},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7024999856948853},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.542900025844574},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5023999810218811},{"id":"https://openalex.org/C184297639","wikidata":"https://www.wikidata.org/wiki/Q177765","display_name":"Biometrics","level":2,"score":0.48739999532699585},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.48010000586509705},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4296000003814697},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.39500001072883606},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.38530001044273376},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.351500004529953},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.35089999437332153},{"id":"https://openalex.org/C74370796","wikidata":"https://www.wikidata.org/wiki/Q15924863","display_name":"Signature recognition","level":3,"score":0.3418999910354614},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3407999873161316},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2987000048160553},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.29679998755455017},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.2775000035762787},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2648000121116638}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.26908","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26908","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.26908","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26908","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Model":[0,83],"fusion":[1,185],"is":[2,19,94],"a":[3,73,79,97,105,150],"key":[4],"strategy":[5],"for":[6,22,55,118],"robust":[7,183],"recognition":[8,188],"in":[9,149,186],"unconstrained":[10],"scenarios,":[11],"as":[12,30,96],"different":[13],"models":[14,54],"provide":[15],"complementary":[16,147],"strengths.":[17],"This":[18],"especially":[20],"important":[21],"whole-body":[23,157],"human":[24],"recognition,":[25],"where":[26],"biometric":[27,158],"cues":[28],"such":[29],"face,":[31],"gait,":[32],"and":[33,39,99,128,145,182],"body":[34],"shape":[35],"vary":[36],"across":[37],"samples":[38],"are":[40,49],"typically":[41],"integrated":[42],"via":[43],"score-fusion.":[44],"However,":[45],"existing":[46],"score-fusion":[47],"strategies":[48],"usually":[50],"static,":[51],"invoking":[52],"all":[53],"every":[56],"test":[57,120],"sample":[58,61],"regardless":[59],"of":[60,179],"quality":[62],"or":[63],"modality":[64],"reliability.":[65],"To":[66,122],"overcome":[67],"these":[68],"limitations,":[69],"we":[70,131],"propose":[71],"\\textbf{FusionAgent},":[72],"novel":[74],"agentic":[75],"framework":[76],"that":[77,161],"leverages":[78],"Multimodal":[80],"Large":[81],"Language":[82],"(MLLM)":[84],"to":[85,111],"perform":[86],"dynamic,":[87,180],"sample-specific":[88],"model":[89,93,116,125,144,173,184],"selection.":[90],"Each":[91],"expert":[92],"treated":[95],"tool,":[98],"through":[100,171],"Reinforcement":[101],"Fine-Tuning":[102],"(RFT)":[103],"with":[104],"metric-based":[106],"reward,":[107],"the":[108,114,124,141,176],"agent":[109],"learns":[110],"adaptively":[112],"determine":[113],"optimal":[115],"combination":[117],"each":[119],"input.":[121],"address":[123],"score":[126],"misalignment":[127],"embedding":[129],"heterogeneity,":[130],"introduce":[132],"Anchor-based":[133],"Confidence":[134],"Top-k":[135],"(ACT)":[136],"score-fusion,":[137],"which":[138],"anchors":[139],"on":[140,155],"most":[142],"confident":[143],"integrates":[146],"predictions":[148],"confidence-aware":[151],"manner.":[152],"Extensive":[153],"experiments":[154],"multiple":[156],"benchmarks":[159],"demonstrate":[160],"FusionAgent":[162],"significantly":[163],"outperforms":[164],"SoTA":[165],"methods":[166],"while":[167],"achieving":[168],"higher":[169],"efficiency":[170],"fewer":[172],"invocations,":[174],"underscoring":[175],"critical":[177],"role":[178],"explainable,":[181],"real-world":[187],"systems.":[189],"Project":[190],"page:":[191],"\\href{https://fusionagent.github.io/}{FusionAgent}.":[192]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-02T00:00:00"}
