{"id":"https://openalex.org/W7140093174","doi":"https://doi.org/10.48550/arxiv.2603.19718","title":"BALM: A Model-Agnostic Framework for Balanced Multimodal Learning under Imbalanced Missing Rates","display_name":"BALM: A Model-Agnostic Framework for Balanced Multimodal Learning under Imbalanced Missing Rates","publication_year":2026,"publication_date":"2026-03-20","ids":{"openalex":"https://openalex.org/W7140093174","doi":"https://doi.org/10.48550/arxiv.2603.19718"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.19718","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19718","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.19718","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128785771","display_name":"Phuong-Anh Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nguyen, Phuong-Anh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128775103","display_name":"Tien Anh Pham","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pham, Tien Anh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130333117","display_name":"Duc-Trong Le","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Le, Duc-Trong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130331945","display_name":"Cam-Van Thi Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Cam-Van Thi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5128785771"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.8533999919891357,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.8533999919891357,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.03539999946951866,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.020600000396370888,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.8026000261306763},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6912000179290771},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.5658000111579895},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5611000061035156},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.554099977016449},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4862000048160553},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.478300005197525},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.46639999747276306}],"concepts":[{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.8026000261306763},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7213000059127808},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6912000179290771},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6790000200271606},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.5658000111579895},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5611000061035156},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.554099977016449},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5393999814987183},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4862000048160553},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.478300005197525},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.46639999747276306},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.43529999256134033},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3407999873161316},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.31299999356269836},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3084000051021576},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.3061999976634979},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.19718","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19718","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.19718","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19718","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Learning":[0],"from":[1,6,51,114],"multiple":[2,141],"modalities":[3,10,18,107],"often":[4],"suffers":[5],"imbalance,":[7],"where":[8,33],"information-rich":[9],"dominate":[11],"optimization":[12],"while":[13],"weaker":[14],"or":[15],"partially":[16],"missing":[17,30,95,155],"contribute":[19],"less.":[20],"This":[21],"imbalance":[22,157],"becomes":[23],"severe":[24],"in":[25],"realistic":[26],"settings":[27],"with":[28,38],"imbalanced":[29],"rates":[31],"(IMR),":[32],"each":[34],"modality":[35],"is":[36],"absent":[37],"different":[39],"probabilities,":[40],"distorting":[41],"representation":[42,91],"learning":[43,66,104],"and":[44,55,112,117,150,156],"gradient":[45,110],"dynamics.":[46],"We":[47],"revisit":[48],"this":[49],"issue":[50],"a":[52,58,89],"training-process":[53],"perspective":[54],"propose":[56],"BALM,":[57],"model-agnostic":[59],"plug-in":[60],"framework":[61,70],"to":[62,87],"achieve":[63],"balanced":[64],"multimodal":[65,129],"under":[67,153],"IMR.":[68],"The":[69],"comprises":[71],"two":[72],"complementary":[73],"modules:":[74],"the":[75,97],"Feature":[76],"Calibration":[77],"Module":[78,100],"(FCM),":[79],"which":[80,102],"recalibrates":[81],"unimodal":[82],"features":[83],"using":[84],"global":[85],"context":[86],"establish":[88],"shared":[90],"basis":[92],"across":[93,106,140],"heterogeneous":[94],"patterns;":[96],"Gradient":[98],"Rebalancing":[99],"(GRM),":[101],"balances":[103],"dynamics":[105],"by":[108],"modulating":[109],"magnitudes":[111],"directions":[113],"both":[115],"distributional":[116],"spatial":[118],"perspectives.":[119],"BALM":[120,146],"can":[121],"be":[122],"seamlessly":[123],"integrated":[124],"into":[125],"diverse":[126,154],"backbones,":[127],"including":[128],"emotion":[130],"recognition":[131],"(MER)":[132],"models,":[133],"without":[134],"altering":[135],"their":[136],"architectures.":[137],"Experimental":[138],"results":[139],"MER":[142],"benchmarks":[143],"confirm":[144],"that":[145],"consistently":[147],"enhances":[148],"robustness":[149],"improves":[151],"performance":[152],"settings.":[158],"Code":[159],"available":[160],"at:":[161],"https://github.com/np4s/BALM_CVPR2026.git":[162]},"counts_by_year":[],"updated_date":"2026-04-30T09:15:22.047038","created_date":"2026-03-24T00:00:00"}
