{"id":"https://openalex.org/W7138272509","doi":"https://doi.org/10.1609/aaai.v40i27.39456","title":"Balancing Multimodal Domain Generalization via Gradient Modulation and Projection","display_name":"Balancing Multimodal Domain Generalization via Gradient Modulation and Projection","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138272509","doi":"https://doi.org/10.1609/aaai.v40i27.39456"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i27.39456","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39456","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i27.39456","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129747997","display_name":"Hongzhao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hongzhao Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058851479","display_name":"Guohao Shen","orcid":"https://orcid.org/0000-0002-5277-0176"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guohao Shen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068024080","display_name":"Shupan Li","orcid":"https://orcid.org/0000-0002-5823-2037"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shupan Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129676901","display_name":"Mingliang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mingliang Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129736839","display_name":"Muhammad Abdullah Khan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muhammad Haris Khan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5129747997"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.48207598,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"27","first_page":"22922","last_page":"22930"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9707000255584717,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9707000255584717,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.010300000198185444,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0017999999690800905,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.8001999855041504},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.7314000129699707},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5515999794006348},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5410000085830688},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.5044000148773193},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5016999840736389},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.4058000147342682},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.38659998774528503}],"concepts":[{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.8001999855041504},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.7314000129699707},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7204999923706055},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6514999866485596},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5515999794006348},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5410000085830688},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.5044000148773193},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5016999840736389},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.4058000147342682},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39590001106262207},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.38659998774528503},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.34450000524520874},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.33570000529289246},{"id":"https://openalex.org/C10494615","wikidata":"https://www.wikidata.org/wiki/Q17086765","display_name":"Proximal Gradient Methods","level":4,"score":0.32670000195503235},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3208000063896179},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.31679999828338623},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.30820000171661377},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3001999855041504},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C123079801","wikidata":"https://www.wikidata.org/wiki/Q750240","display_name":"Modulation (music)","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.26589998602867126}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i27.39456","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39456","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i27.39456","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39456","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"Domain":[1],"Generalization":[2],"(MMDG)":[3],"leverages":[4],"the":[5,47,69,92,153],"complementary":[6],"strengths":[7],"of":[8,156],"multiple":[9,188],"modalities":[10,28,44,88],"to":[11,38,45,72,98],"enhance":[12],"model":[13],"generalization":[14,186],"on":[15,64,68,79,91,140],"unseen":[16,99],"domains.":[17],"A":[18],"central":[19],"challenge":[20],"in":[21,86,121],"multimodal":[22],"learning":[23,48,165],"is":[24],"optimization":[25,120],"imbalance,":[26],"where":[27],"converge":[29],"at":[30],"different":[31],"speeds":[32],"during":[33],"training.":[34],"This":[35],"imbalance":[36],"leads":[37],"unequal":[39],"gradient":[40,61,138,149],"contributions,":[41],"allowing":[42],"some":[43],"dominate":[46],"process":[49],"while":[50],"others":[51],"lag":[52],"behind.":[53],"Existing":[54],"balancing":[55],"strategies":[56],"typically":[57],"regulate":[58],"each":[59,136,157],"modality\u2019s":[60,137],"contribution":[62],"based":[63,139],"its":[65],"classification":[66,129,162],"performance":[67,176],"source":[70,93],"domain":[71,94,143],"alleviate":[73],"this":[74,106],"issue.":[75],"However,":[76],"relying":[77],"solely":[78],"source-domain":[80],"accuracy":[81],"neglects":[82],"a":[83,114],"key":[84],"insight":[85],"MMDG:":[87],"that":[89,117,172],"excel":[90],"may":[95],"generalize":[96],"poorly":[97],"domains,":[100],"limiting":[101],"cross-domain":[102],"gains.":[103],"To":[104],"overcome":[105],"limitation,":[107],"we":[108],"propose":[109],"Gradient":[110],"Modulation":[111],"Projection":[112],"(GMP),":[113],"unified":[115],"strategy":[116],"promotes":[118],"balanced":[119],"MMDG.":[122],"GMP":[123,146,173],"first":[124],"decouples":[125],"gradients":[126],"associated":[127],"with":[128,180],"and":[130,142,163,177],"domain-invariance":[131],"objectives.":[132],"It":[133],"then":[134],"modulates":[135],"semantic":[141],"confidence.":[144],"Moreover,":[145],"dynamically":[147],"adjusts":[148],"projections":[150],"by":[151],"tracking":[152],"relative":[154],"strength":[155],"task,":[158],"mitigating":[159],"conflicts":[160],"between":[161],"domain-invariant":[164],"within":[166],"modality-specific":[167],"encoders.":[168],"Extensive":[169],"experiments":[170],"demonstrate":[171],"achieves":[174],"state-of-the-art":[175],"integrates":[178],"flexibly":[179],"diverse":[181],"MMDG":[182],"methods,":[183],"significantly":[184],"improving":[185],"across":[187],"benchmarks.":[189]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
