{"id":"https://openalex.org/W4415537014","doi":"https://doi.org/10.1145/3746027.3754954","title":"Towards Robust Multimodal Domain Generalization via Modality-Domain Joint Adversarial Training","display_name":"Towards Robust Multimodal Domain Generalization via Modality-Domain Joint Adversarial Training","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415537014","doi":"https://doi.org/10.1145/3746027.3754954"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3754954","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754954","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008871967","display_name":"Hongzhao Li","orcid":"https://orcid.org/0009-0002-6611-7896"},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongzhao Li","raw_affiliation_strings":["School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114196036","display_name":"Haohao Wan","orcid":null},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hualei Wan","raw_affiliation_strings":["School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085128419","display_name":"Liangzhi Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liangzhi Zhang","raw_affiliation_strings":["School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017257056","display_name":"Mingyuan Jiu","orcid":"https://orcid.org/0000-0002-4868-0709"},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyuan Jiu","raw_affiliation_strings":["School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068024080","display_name":"Shupan Li","orcid":"https://orcid.org/0000-0002-5823-2037"},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shupan Li","raw_affiliation_strings":["School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081346568","display_name":"Mingliang Xu","orcid":"https://orcid.org/0000-0002-6885-3451"},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingliang Xu","raw_affiliation_strings":["School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032830353","display_name":"Muhammad Haris Khan","orcid":"https://orcid.org/0000-0001-9746-276X"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Muhammad Haris Khan","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence, Abu Dhabi, United Arab Emirates"],"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence, Abu Dhabi, United Arab Emirates","institution_ids":["https://openalex.org/I4210113480"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5008871967"],"corresponding_institution_ids":["https://openalex.org/I38877650"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16316752,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"180","last_page":"188"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.7233999967575073},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6398000121116638},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5806000232696533},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.5257999897003174},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5023999810218811},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.49470001459121704},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4657999873161316},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.43529999256134033},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.38359999656677246}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7278000116348267},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.7233999967575073},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6420999765396118},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6398000121116638},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5806000232696533},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.5257999897003174},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5087000131607056},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5023999810218811},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.49470001459121704},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4657999873161316},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.43529999256134033},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.38359999656677246},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.38019999861717224},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.3671000003814697},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.365200012922287},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.319599986076355},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.314300000667572},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.25850000977516174},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25380000472068787},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3754954","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754954","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2798658180","https://openalex.org/W2958360136","https://openalex.org/W2998712190","https://openalex.org/W3015371781","https://openalex.org/W3153511633","https://openalex.org/W4391679974","https://openalex.org/W4399055279","https://openalex.org/W4403780748","https://openalex.org/W4403791574"],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"Domain":[1],"Generalization":[2],"(MMDG)":[3],"aims":[4],"to":[5,49],"enhance":[6],"the":[7,93,120,133,144],"robustness":[8],"of":[9,135],"multimodal":[10,86],"models":[11],"against":[12],"distribution":[13],"shifts":[14],"in":[15,82,92],"unseen":[16],"target":[17],"domains.":[18],"Unlike":[19],"unimodal":[20],"domain":[21,29,50,80],"generalization":[22],"methods,":[23],"which":[24],"primarily":[25],"focus":[26],"on":[27,109,114,143],"mitigating":[28],"bias":[30],"within":[31],"individual":[32],"modalities,":[33],"MMDG":[34,156],"faces":[35],"unique":[36],"challenges,":[37,55],"notably":[38],"modality":[39,106],"heterogeneity":[40],"(divergent":[41],"feature":[42],"spaces)":[43],"and":[44,85,96,146],"stability":[45],"discrepancy":[46],"(varying":[47],"sensitivity":[48],"shifts).":[51],"To":[52],"tackle":[53],"these":[54,67],"we":[56,118],"propose":[57],"Modality-Domain":[58],"Joint":[59],"Adversarial":[60],"Training,":[61],"a":[62,74,98,128],"unified":[63],"framework":[64],"that":[65,78,103,131],"addresses":[66],"challenges":[68],"through":[69],"two":[70],"key":[71],"innovations:":[72],"(1)":[73],"tri-discriminator":[75],"adversarial":[76],"module":[77],"mitigates":[79],"biases":[81],"both":[83],"modality-specific":[84],"representations,":[87],"while":[88,149],"suppressing":[89],"modality-heterogeneous":[90],"patterns":[91],"representation":[94],"space;":[95],"(2)":[97],"stability-aware":[99],"dynamic":[100],"weighting":[101],"mechanism":[102],"adaptively":[104],"balances":[105],"contributions":[107],"based":[108],"cross-domain":[110],"stability,":[111],"reducing":[112],"reliance":[113],"unstable":[115],"modalities.":[116],"Additionally,":[117],"provide":[119],"first":[121],"theoretical":[122,129],"error":[123],"bound":[124],"for":[125],"MMDG,":[126],"offering":[127],"foundation":[130],"supports":[132],"effectiveness":[134],"our":[136],"approach.":[137],"Our":[138],"approach":[139],"achieves":[140],"state-of-the-art":[141],"performance":[142],"EPIC-Kitchens":[145],"HAC":[147],"datasets":[148],"using":[150],"75.2%":[151],"fewer":[152],"parameters":[153],"than":[154],"previous":[155],"methods.":[157],"The":[158],"source":[159],"code":[160],"is":[161],"available":[162],"at":[163],"https://github.com/lihongzhao99/MMDG-Joint-Adversarial-Training.":[164]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-25T00:00:00"}
